# Optimize distance : forecasting of Lorenz data as a function of horizon and output variable

This notebook accompanies the following publication:
Paul Platzer, Arthur Avenas, Bertrand Chapron, Lucas Drumetz, Alexis Mouche, Léo Vinour. Distance Learning for Analog Methods. 2024. [⟨hal-04841334⟩](https://hal.science/hal-04841334)

It is used to run optimization algorithms for numerical experiments with Lorenz system data. In particular, we investigate the dependency of the optimal distance with the catalog size (i.e. the number of available data).

This notebook required loading a lot of data into memory. It was initially run on a laptop with 32GB of RAM. Running this notebook on your device might require minor modifications of the code.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from tqdm.notebook import tqdm
from sklearn.neighbors import NearestNeighbors
import sys
sys.path.append('../../functions/.')
from analogs import apply_transform, find_analogues, compute_weights, compute_diffs, compute_mae_mad, compute_error
from distance_learning import compute_gradient_MSE, compute_regularization, learn_distance

In [2]:
data_folder = '../../data/lorenz/'
output_folder = '../../output/lorenz/'

# Load catalog

In [3]:
traj_norm = []; stds = []
for j in range(10):
    npzfile = np.load(data_folder + 'catalog_large_'+str(j)+'.npz')
    traj_norm.append(npzfile['traj_norm'])
    stds.append(npzfile['stds'])
    dt = npzfile['dt']
    Ntrain = npzfile['Ntrain']
    tau = npzfile['tau']
    h_max = npzfile['h_max']
traj_norm = np.array(traj_norm)
stds = np.array(stds)

# Optimization: loop on catalog size

In [4]:
# Define training explanatory variable and forecast horizon
train_x = traj_norm[0][::int(tau/dt)][:Ntrain]
for j in range(1,10):
    train_x = np.concatenate((train_x,traj_norm[j][::int(tau/dt)][:Ntrain]), axis=0)
hh_ind = 32
horizon = dt * hh_ind
train_y = traj_norm[0][hh_ind::int(tau/dt)][:Ntrain]
for j in range(1,10):
    train_y = np.concatenate((train_y,traj_norm[j][hh_ind::int(tau/dt)][:Ntrain]), axis=0)

del traj_norm

In [14]:
# Set parameters for optimization
k = 200
nn_algo = 'kd_tree'
loo = True
corr_length_train = 0
n_epoch = 60
learning_rate_factor = 50
regul_coef = [0]
transform_matrix = np.eye(3)

### Choose sub-catalog sizes and number of permutations to make
Ncats = np.round( len(train_x) / 10**np.linspace(4,1.5,25) ).astype(int)
Nperm = 10

# Initialize transform matrices to be stored
transform_optim = np.empty((len(Ncats), Nperm, 3, 3))
mse_optim = np.empty((len(Ncats), Nperm, n_epoch+1))

for i_ncat in tqdm(range(len(Ncats))):
    
    ncat = Ncats[i_ncat]
    ind_subcat_noperm = np.arange( 0 , len(train_x) , int(len(train_x)/(ncat*Nperm)) )
    # Set permutation for sub catalogs
    rs = np.random.RandomState(1312)
    permutation = rs.permutation(ind_subcat_noperm)
    
    for j_perm in range(Nperm):
        # Take subsample of catalog of size Ncats[i_sub] from permutation
        ind_subcat_perm = permutation[j_perm*ncat:(j_perm+1)*ncat]

        train_x_sub = train_x[ind_subcat_perm]
        train_y_sub = train_y[ind_subcat_perm]
        
        Itrain = np.arange(len(ind_subcat_perm))

        train_X_sub = apply_transform(train_x_sub, transform_matrix, Itrain)
                
        nn = NearestNeighbors( algorithm = nn_algo , 
                                  n_neighbors = k + 1 + 2*corr_length_train ) # leave-one-out procedure + anticipating time-correlated data
        nn.fit(train_X_sub)
        
        mse_init = compute_error(train_X_sub, train_y_sub, Itrain, Itrain, k, nn, loo=True, corr_length_train=0, vector_out=False, error_type='MSE')
        
        learning_rate = learning_rate_factor / mse_init
        
        result = learn_distance(train_x_sub, train_y_sub, transform_matrix, Itrain, Itrain, [], 
                           k = k, nn_algo='auto', error_type='MSE', n_epoch=n_epoch,
                            learning_rate = learning_rate, regul_coef = regul_coef,
                           loo=True, corr_length_train=corr_length_train,
                             batch_size = len(train_X_sub),
                                 verbose_batch = True)

        transform_optim[i_ncat, j_perm] = result[0][-1].copy()
        mse_optim[i_ncat, j_perm] = result[2].copy()

  0%|          | 0/25 [00:00<?, ?it/s]

Starting distance-learning algorithm with the following parameters:
Error type = MSE
Transformation type = matrix (general linear transformation)
Number of analogues = 200
Learning rate = 45.47589682008376
Number of Epochs = 60
Mini-batch size = 1000
Regularization = [0]


  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 1.0994 ;  MSE(train) = 1.0994 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7965 ;  MSE(train) = 0.7965 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0281 ;  MSE(train) = 0.0281 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0280 ;  MSE(train) = 0.0280 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0280 ;  MSE(train) = 0.0280 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0279 ;  MSE(train) = 0.0279 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0278 ;  MSE(train) = 0.0278 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0278 ;  MSE(train) = 0.0278 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0277 ;  MSE(train) = 0.0277 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0277 ;  MSE(train) = 0.0277 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0276 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 1.0887 ;  MSE(train) = 1.0887 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.6544 ;  MSE(train) = 0.6544 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1209 ;  MSE(train) = 0.1209 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.1202 ;  MSE(train) = 0.1202 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.1195 ;  MSE(train) = 0.1195 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.1188 ;  MSE(train) = 0.1188 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.1180 ;  MSE(train) = 0.1180 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.1172 ;  MSE(train) = 0.1172 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.1163 ;  MSE(train) = 0.1163 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.1153 ;  MSE(train) = 0.1153 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.1142 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 1.0921 ;  MSE(train) = 1.0921 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7827 ;  MSE(train) = 0.7827 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0326 ;  MSE(train) = 0.0326 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0288 ;  MSE(train) = 0.0288 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0271 ;  MSE(train) = 0.0271 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0261 ;  MSE(train) = 0.0261 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0254 ;  MSE(train) = 0.0254 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0248 ;  MSE(train) = 0.0248 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0243 ;  MSE(train) = 0.0243 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0239 ;  MSE(train) = 0.0239 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0236 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 1.1191 ;  MSE(train) = 1.1191 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.8345 ;  MSE(train) = 0.8345 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0235 ;  MSE(train) = 0.0235 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0235 ;  MSE(train) = 0.0235 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0235 ;  MSE(train) = 0.0235 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0234 ;  MSE(train) = 0.0234 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0234 ;  MSE(train) = 0.0234 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0233 ;  MSE(train) = 0.0233 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0233 ;  MSE(train) = 0.0233 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0233 ;  MSE(train) = 0.0233 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0232 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 1.0225 ;  MSE(train) = 1.0225 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7063 ;  MSE(train) = 0.7063 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0733 ;  MSE(train) = 0.0733 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0726 ;  MSE(train) = 0.0726 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0721 ;  MSE(train) = 0.0721 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0716 ;  MSE(train) = 0.0716 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0712 ;  MSE(train) = 0.0712 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0708 ;  MSE(train) = 0.0708 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0704 ;  MSE(train) = 0.0704 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0700 ;  MSE(train) = 0.0700 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0697 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 1.1566 ;  MSE(train) = 1.1566 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.4906 ;  MSE(train) = 0.4906 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0856 ;  MSE(train) = 0.0856 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0848 ;  MSE(train) = 0.0848 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0841 ;  MSE(train) = 0.0841 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0834 ;  MSE(train) = 0.0834 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0827 ;  MSE(train) = 0.0827 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0821 ;  MSE(train) = 0.0821 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0815 ;  MSE(train) = 0.0815 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0809 ;  MSE(train) = 0.0809 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0803 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 1.1416 ;  MSE(train) = 1.1416 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7285 ;  MSE(train) = 0.7285 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0783 ;  MSE(train) = 0.0783 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0777 ;  MSE(train) = 0.0777 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0770 ;  MSE(train) = 0.0770 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0760 ;  MSE(train) = 0.0760 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0747 ;  MSE(train) = 0.0747 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0729 ;  MSE(train) = 0.0729 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0701 ;  MSE(train) = 0.0701 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0657 ;  MSE(train) = 0.0657 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0582 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 1.0960 ;  MSE(train) = 1.0960 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.8075 ;  MSE(train) = 0.8075 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0449 ;  MSE(train) = 0.0449 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0445 ;  MSE(train) = 0.0445 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0441 ;  MSE(train) = 0.0441 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0437 ;  MSE(train) = 0.0437 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0433 ;  MSE(train) = 0.0433 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0428 ;  MSE(train) = 0.0428 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0424 ;  MSE(train) = 0.0424 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0420 ;  MSE(train) = 0.0420 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0416 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 1.0587 ;  MSE(train) = 1.0587 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7565 ;  MSE(train) = 0.7565 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0290 ;  MSE(train) = 0.0290 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0289 ;  MSE(train) = 0.0289 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0288 ;  MSE(train) = 0.0288 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0287 ;  MSE(train) = 0.0287 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0286 ;  MSE(train) = 0.0286 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0285 ;  MSE(train) = 0.0285 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0284 ;  MSE(train) = 0.0284 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0282 ;  MSE(train) = 0.0282 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0281 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 1.0932 ;  MSE(train) = 1.0932 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7510 ;  MSE(train) = 0.7510 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0650 ;  MSE(train) = 0.0650 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0596 ;  MSE(train) = 0.0596 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0572 ;  MSE(train) = 0.0572 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0558 ;  MSE(train) = 0.0558 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0548 ;  MSE(train) = 0.0548 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0541 ;  MSE(train) = 0.0541 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0535 ;  MSE(train) = 0.0535 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0529 ;  MSE(train) = 0.0529 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0523 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.9817 ;  MSE(train) = 0.9817 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7474 ;  MSE(train) = 0.7474 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0352 ;  MSE(train) = 0.0352 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0349 ;  MSE(train) = 0.0349 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0346 ;  MSE(train) = 0.0346 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0343 ;  MSE(train) = 0.0343 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0339 ;  MSE(train) = 0.0339 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0336 ;  MSE(train) = 0.0336 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0332 ;  MSE(train) = 0.0332 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0328 ;  MSE(train) = 0.0328 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0325 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.9441 ;  MSE(train) = 0.9441 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.8249 ;  MSE(train) = 0.8249 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0502 ;  MSE(train) = 0.0502 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0496 ;  MSE(train) = 0.0496 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0489 ;  MSE(train) = 0.0489 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0481 ;  MSE(train) = 0.0481 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0472 ;  MSE(train) = 0.0472 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0462 ;  MSE(train) = 0.0462 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0450 ;  MSE(train) = 0.0450 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0438 ;  MSE(train) = 0.0438 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0424 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.8628 ;  MSE(train) = 0.8628 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.8522 ;  MSE(train) = 0.8522 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0566 ;  MSE(train) = 0.0566 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0555 ;  MSE(train) = 0.0555 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0543 ;  MSE(train) = 0.0543 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0529 ;  MSE(train) = 0.0529 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0514 ;  MSE(train) = 0.0514 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0497 ;  MSE(train) = 0.0497 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0479 ;  MSE(train) = 0.0479 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0459 ;  MSE(train) = 0.0459 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0441 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.9550 ;  MSE(train) = 0.9550 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7942 ;  MSE(train) = 0.7942 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0422 ;  MSE(train) = 0.0422 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0232 ;  MSE(train) = 0.0232 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0220 ;  MSE(train) = 0.0220 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0214 ;  MSE(train) = 0.0214 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0209 ;  MSE(train) = 0.0209 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0206 ;  MSE(train) = 0.0206 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0204 ;  MSE(train) = 0.0204 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0202 ;  MSE(train) = 0.0202 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0200 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.9508 ;  MSE(train) = 0.9508 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7061 ;  MSE(train) = 0.7061 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0737 ;  MSE(train) = 0.0737 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0718 ;  MSE(train) = 0.0718 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0697 ;  MSE(train) = 0.0697 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0675 ;  MSE(train) = 0.0675 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0649 ;  MSE(train) = 0.0649 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0620 ;  MSE(train) = 0.0620 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0589 ;  MSE(train) = 0.0589 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0563 ;  MSE(train) = 0.0563 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0547 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.8882 ;  MSE(train) = 0.8882 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7816 ;  MSE(train) = 0.7816 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0269 ;  MSE(train) = 0.0269 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0227 ;  MSE(train) = 0.0227 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0218 ;  MSE(train) = 0.0218 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0211 ;  MSE(train) = 0.0211 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0206 ;  MSE(train) = 0.0206 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0203 ;  MSE(train) = 0.0203 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0199 ;  MSE(train) = 0.0199 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0196 ;  MSE(train) = 0.0196 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0194 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 1.0045 ;  MSE(train) = 1.0045 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.4783 ;  MSE(train) = 0.4783 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1172 ;  MSE(train) = 0.1172 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0917 ;  MSE(train) = 0.0917 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0870 ;  MSE(train) = 0.0870 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0844 ;  MSE(train) = 0.0844 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0819 ;  MSE(train) = 0.0819 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0793 ;  MSE(train) = 0.0793 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0765 ;  MSE(train) = 0.0765 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0740 ;  MSE(train) = 0.0740 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0722 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.9045 ;  MSE(train) = 0.9045 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7256 ;  MSE(train) = 0.7256 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0241 ;  MSE(train) = 0.0241 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0238 ;  MSE(train) = 0.0238 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0235 ;  MSE(train) = 0.0235 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0233 ;  MSE(train) = 0.0233 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0230 ;  MSE(train) = 0.0230 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0227 ;  MSE(train) = 0.0227 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0225 ;  MSE(train) = 0.0225 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0222 ;  MSE(train) = 0.0222 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0220 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.9102 ;  MSE(train) = 0.9102 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7636 ;  MSE(train) = 0.7636 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0242 ;  MSE(train) = 0.0242 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0241 ;  MSE(train) = 0.0241 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0240 ;  MSE(train) = 0.0240 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0238 ;  MSE(train) = 0.0238 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0237 ;  MSE(train) = 0.0237 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0236 ;  MSE(train) = 0.0236 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0235 ;  MSE(train) = 0.0235 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0234 ;  MSE(train) = 0.0234 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0232 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.9328 ;  MSE(train) = 0.9328 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.8153 ;  MSE(train) = 0.8153 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0817 ;  MSE(train) = 0.0817 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0243 ;  MSE(train) = 0.0243 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0220 ;  MSE(train) = 0.0220 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0206 ;  MSE(train) = 0.0206 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0199 ;  MSE(train) = 0.0199 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0195 ;  MSE(train) = 0.0195 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0193 ;  MSE(train) = 0.0193 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0191 ;  MSE(train) = 0.0191 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0190 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.7896 ;  MSE(train) = 0.7896 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7637 ;  MSE(train) = 0.7637 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0687 ;  MSE(train) = 0.0687 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0680 ;  MSE(train) = 0.0680 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0672 ;  MSE(train) = 0.0672 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0665 ;  MSE(train) = 0.0665 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0658 ;  MSE(train) = 0.0658 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0651 ;  MSE(train) = 0.0651 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0644 ;  MSE(train) = 0.0644 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0637 ;  MSE(train) = 0.0637 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0629 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.7322 ;  MSE(train) = 0.7322 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7004 ;  MSE(train) = 0.7004 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0801 ;  MSE(train) = 0.0801 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0791 ;  MSE(train) = 0.0791 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0781 ;  MSE(train) = 0.0781 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0773 ;  MSE(train) = 0.0773 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0766 ;  MSE(train) = 0.0766 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0759 ;  MSE(train) = 0.0759 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0753 ;  MSE(train) = 0.0753 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0748 ;  MSE(train) = 0.0748 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0742 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.7000 ;  MSE(train) = 0.7000 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.8353 ;  MSE(train) = 0.8353 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0247 ;  MSE(train) = 0.0247 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0197 ;  MSE(train) = 0.0197 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0183 ;  MSE(train) = 0.0183 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0176 ;  MSE(train) = 0.0176 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0171 ;  MSE(train) = 0.0171 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0168 ;  MSE(train) = 0.0168 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0166 ;  MSE(train) = 0.0166 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0164 ;  MSE(train) = 0.0164 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0162 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.7863 ;  MSE(train) = 0.7863 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.8131 ;  MSE(train) = 0.8131 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0447 ;  MSE(train) = 0.0447 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0425 ;  MSE(train) = 0.0425 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0418 ;  MSE(train) = 0.0418 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0411 ;  MSE(train) = 0.0411 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0405 ;  MSE(train) = 0.0405 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0400 ;  MSE(train) = 0.0400 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0396 ;  MSE(train) = 0.0396 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0392 ;  MSE(train) = 0.0392 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0388 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.7840 ;  MSE(train) = 0.7840 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7668 ;  MSE(train) = 0.7668 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0688 ;  MSE(train) = 0.0688 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0667 ;  MSE(train) = 0.0667 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0649 ;  MSE(train) = 0.0649 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0629 ;  MSE(train) = 0.0629 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0560 ;  MSE(train) = 0.0560 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0463 ;  MSE(train) = 0.0463 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0455 ;  MSE(train) = 0.0455 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0448 ;  MSE(train) = 0.0448 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0441 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.7940 ;  MSE(train) = 0.7940 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.8171 ;  MSE(train) = 0.8171 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0231 ;  MSE(train) = 0.0231 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0228 ;  MSE(train) = 0.0228 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0225 ;  MSE(train) = 0.0225 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0222 ;  MSE(train) = 0.0222 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0220 ;  MSE(train) = 0.0220 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0218 ;  MSE(train) = 0.0218 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0216 ;  MSE(train) = 0.0216 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0214 ;  MSE(train) = 0.0214 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0212 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.8096 ;  MSE(train) = 0.8096 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7630 ;  MSE(train) = 0.7630 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0966 ;  MSE(train) = 0.0966 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0928 ;  MSE(train) = 0.0928 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0879 ;  MSE(train) = 0.0879 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0812 ;  MSE(train) = 0.0812 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0738 ;  MSE(train) = 0.0738 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0694 ;  MSE(train) = 0.0694 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0673 ;  MSE(train) = 0.0673 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0661 ;  MSE(train) = 0.0661 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0652 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.7545 ;  MSE(train) = 0.7545 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.8275 ;  MSE(train) = 0.8275 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0289 ;  MSE(train) = 0.0289 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0281 ;  MSE(train) = 0.0281 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0274 ;  MSE(train) = 0.0274 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0267 ;  MSE(train) = 0.0267 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0261 ;  MSE(train) = 0.0261 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0256 ;  MSE(train) = 0.0256 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0251 ;  MSE(train) = 0.0251 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0246 ;  MSE(train) = 0.0246 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0242 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.7891 ;  MSE(train) = 0.7891 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.8151 ;  MSE(train) = 0.8151 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1848 ;  MSE(train) = 0.1848 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0576 ;  MSE(train) = 0.0576 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0184 ;  MSE(train) = 0.0184 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0178 ;  MSE(train) = 0.0178 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0174 ;  MSE(train) = 0.0174 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0171 ;  MSE(train) = 0.0171 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0169 ;  MSE(train) = 0.0169 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0167 ;  MSE(train) = 0.0167 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0165 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.7767 ;  MSE(train) = 0.7767 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7797 ;  MSE(train) = 0.7797 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0370 ;  MSE(train) = 0.0370 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0353 ;  MSE(train) = 0.0353 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0331 ;  MSE(train) = 0.0331 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0322 ;  MSE(train) = 0.0322 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0316 ;  MSE(train) = 0.0316 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0310 ;  MSE(train) = 0.0310 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0304 ;  MSE(train) = 0.0304 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0299 ;  MSE(train) = 0.0299 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0294 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.6862 ;  MSE(train) = 0.6862 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7291 ;  MSE(train) = 0.7291 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0833 ;  MSE(train) = 0.0833 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0790 ;  MSE(train) = 0.0790 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0756 ;  MSE(train) = 0.0756 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0730 ;  MSE(train) = 0.0730 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0710 ;  MSE(train) = 0.0710 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0694 ;  MSE(train) = 0.0694 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0680 ;  MSE(train) = 0.0680 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0667 ;  MSE(train) = 0.0667 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0657 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.6312 ;  MSE(train) = 0.6312 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.6729 ;  MSE(train) = 0.6729 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0861 ;  MSE(train) = 0.0861 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0816 ;  MSE(train) = 0.0816 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0787 ;  MSE(train) = 0.0787 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0765 ;  MSE(train) = 0.0765 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0740 ;  MSE(train) = 0.0740 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0726 ;  MSE(train) = 0.0726 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0723 ;  MSE(train) = 0.0723 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0718 ;  MSE(train) = 0.0718 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0712 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.6334 ;  MSE(train) = 0.6334 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.6055 ;  MSE(train) = 0.6055 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0840 ;  MSE(train) = 0.0840 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0814 ;  MSE(train) = 0.0814 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0792 ;  MSE(train) = 0.0792 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0774 ;  MSE(train) = 0.0774 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0759 ;  MSE(train) = 0.0759 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0746 ;  MSE(train) = 0.0746 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0735 ;  MSE(train) = 0.0735 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0725 ;  MSE(train) = 0.0725 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0716 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.6741 ;  MSE(train) = 0.6741 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.6494 ;  MSE(train) = 0.6494 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1034 ;  MSE(train) = 0.1034 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0963 ;  MSE(train) = 0.0963 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0909 ;  MSE(train) = 0.0909 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0863 ;  MSE(train) = 0.0863 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0864 ;  MSE(train) = 0.0864 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0865 ;  MSE(train) = 0.0865 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0861 ;  MSE(train) = 0.0861 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0851 ;  MSE(train) = 0.0851 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0839 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.6341 ;  MSE(train) = 0.6341 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.5050 ;  MSE(train) = 0.5050 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1474 ;  MSE(train) = 0.1474 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.1273 ;  MSE(train) = 0.1273 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.1217 ;  MSE(train) = 0.1217 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.1181 ;  MSE(train) = 0.1181 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.1146 ;  MSE(train) = 0.1146 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.1113 ;  MSE(train) = 0.1113 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.1082 ;  MSE(train) = 0.1082 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.1054 ;  MSE(train) = 0.1054 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.1027 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.6772 ;  MSE(train) = 0.6772 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7172 ;  MSE(train) = 0.7172 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0893 ;  MSE(train) = 0.0893 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0792 ;  MSE(train) = 0.0792 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0747 ;  MSE(train) = 0.0747 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0726 ;  MSE(train) = 0.0726 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0710 ;  MSE(train) = 0.0710 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0697 ;  MSE(train) = 0.0697 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0684 ;  MSE(train) = 0.0684 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0672 ;  MSE(train) = 0.0672 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0660 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.6311 ;  MSE(train) = 0.6311 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7697 ;  MSE(train) = 0.7697 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0515 ;  MSE(train) = 0.0515 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0498 ;  MSE(train) = 0.0498 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0483 ;  MSE(train) = 0.0483 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0470 ;  MSE(train) = 0.0470 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0458 ;  MSE(train) = 0.0458 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0447 ;  MSE(train) = 0.0447 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0436 ;  MSE(train) = 0.0436 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0427 ;  MSE(train) = 0.0427 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0418 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.6948 ;  MSE(train) = 0.6948 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7909 ;  MSE(train) = 0.7909 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0923 ;  MSE(train) = 0.0923 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0908 ;  MSE(train) = 0.0908 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0897 ;  MSE(train) = 0.0897 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0887 ;  MSE(train) = 0.0887 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0879 ;  MSE(train) = 0.0879 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0871 ;  MSE(train) = 0.0871 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0865 ;  MSE(train) = 0.0865 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0858 ;  MSE(train) = 0.0858 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0853 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.6545 ;  MSE(train) = 0.6545 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7675 ;  MSE(train) = 0.7675 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0755 ;  MSE(train) = 0.0755 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0723 ;  MSE(train) = 0.0723 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0696 ;  MSE(train) = 0.0696 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0669 ;  MSE(train) = 0.0669 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0638 ;  MSE(train) = 0.0638 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0608 ;  MSE(train) = 0.0608 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0582 ;  MSE(train) = 0.0582 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0558 ;  MSE(train) = 0.0558 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0538 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.5800 ;  MSE(train) = 0.5800 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.5144 ;  MSE(train) = 0.5144 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1152 ;  MSE(train) = 0.1152 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.1131 ;  MSE(train) = 0.1131 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.1111 ;  MSE(train) = 0.1111 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.1088 ;  MSE(train) = 0.1088 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.1012 ;  MSE(train) = 0.1012 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0873 ;  MSE(train) = 0.0873 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0867 ;  MSE(train) = 0.0867 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0862 ;  MSE(train) = 0.0862 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0857 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.5107 ;  MSE(train) = 0.5107 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.3658 ;  MSE(train) = 0.3658 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0918 ;  MSE(train) = 0.0918 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0879 ;  MSE(train) = 0.0879 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0834 ;  MSE(train) = 0.0834 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0777 ;  MSE(train) = 0.0777 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0741 ;  MSE(train) = 0.0741 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0723 ;  MSE(train) = 0.0723 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0709 ;  MSE(train) = 0.0709 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0698 ;  MSE(train) = 0.0698 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0689 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.5165 ;  MSE(train) = 0.5165 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.5374 ;  MSE(train) = 0.5374 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1035 ;  MSE(train) = 0.1035 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0858 ;  MSE(train) = 0.0858 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0795 ;  MSE(train) = 0.0795 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0740 ;  MSE(train) = 0.0740 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0695 ;  MSE(train) = 0.0695 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0667 ;  MSE(train) = 0.0667 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0649 ;  MSE(train) = 0.0649 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0635 ;  MSE(train) = 0.0635 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0624 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.5253 ;  MSE(train) = 0.5253 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.3378 ;  MSE(train) = 0.3378 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0694 ;  MSE(train) = 0.0694 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0568 ;  MSE(train) = 0.0568 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0506 ;  MSE(train) = 0.0506 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0478 ;  MSE(train) = 0.0478 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0451 ;  MSE(train) = 0.0451 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0420 ;  MSE(train) = 0.0420 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0388 ;  MSE(train) = 0.0388 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0363 ;  MSE(train) = 0.0363 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0346 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.4959 ;  MSE(train) = 0.4959 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.5647 ;  MSE(train) = 0.5647 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1006 ;  MSE(train) = 0.1006 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0930 ;  MSE(train) = 0.0930 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0869 ;  MSE(train) = 0.0869 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0813 ;  MSE(train) = 0.0813 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0755 ;  MSE(train) = 0.0755 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0727 ;  MSE(train) = 0.0727 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0706 ;  MSE(train) = 0.0706 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0688 ;  MSE(train) = 0.0688 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0670 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.5047 ;  MSE(train) = 0.5047 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.4086 ;  MSE(train) = 0.4086 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1060 ;  MSE(train) = 0.1060 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0981 ;  MSE(train) = 0.0981 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0941 ;  MSE(train) = 0.0941 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0916 ;  MSE(train) = 0.0916 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0895 ;  MSE(train) = 0.0895 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0878 ;  MSE(train) = 0.0878 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0861 ;  MSE(train) = 0.0861 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0845 ;  MSE(train) = 0.0845 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0830 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.5456 ;  MSE(train) = 0.5456 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7185 ;  MSE(train) = 0.7185 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0991 ;  MSE(train) = 0.0991 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0937 ;  MSE(train) = 0.0937 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0894 ;  MSE(train) = 0.0894 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0856 ;  MSE(train) = 0.0856 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0821 ;  MSE(train) = 0.0821 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0791 ;  MSE(train) = 0.0791 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0766 ;  MSE(train) = 0.0766 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0743 ;  MSE(train) = 0.0743 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0724 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.5291 ;  MSE(train) = 0.5291 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.7139 ;  MSE(train) = 0.7139 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0855 ;  MSE(train) = 0.0855 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0745 ;  MSE(train) = 0.0745 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0720 ;  MSE(train) = 0.0720 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0700 ;  MSE(train) = 0.0700 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0683 ;  MSE(train) = 0.0683 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0670 ;  MSE(train) = 0.0670 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0660 ;  MSE(train) = 0.0660 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0651 ;  MSE(train) = 0.0651 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0643 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.5033 ;  MSE(train) = 0.5033 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.5421 ;  MSE(train) = 0.5421 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1213 ;  MSE(train) = 0.1213 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.1055 ;  MSE(train) = 0.1055 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0987 ;  MSE(train) = 0.0987 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0957 ;  MSE(train) = 0.0957 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0938 ;  MSE(train) = 0.0938 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0923 ;  MSE(train) = 0.0923 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0911 ;  MSE(train) = 0.0911 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0901 ;  MSE(train) = 0.0901 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0892 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.5104 ;  MSE(train) = 0.5104 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.5050 ;  MSE(train) = 0.5050 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1075 ;  MSE(train) = 0.1075 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.1020 ;  MSE(train) = 0.1020 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0982 ;  MSE(train) = 0.0982 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0948 ;  MSE(train) = 0.0948 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0924 ;  MSE(train) = 0.0924 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0905 ;  MSE(train) = 0.0905 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0888 ;  MSE(train) = 0.0888 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0872 ;  MSE(train) = 0.0872 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0857 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.5404 ;  MSE(train) = 0.5404 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.4635 ;  MSE(train) = 0.4635 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1481 ;  MSE(train) = 0.1481 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.1378 ;  MSE(train) = 0.1378 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.1314 ;  MSE(train) = 0.1314 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.1242 ;  MSE(train) = 0.1242 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.1170 ;  MSE(train) = 0.1170 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.1163 ;  MSE(train) = 0.1163 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.1157 ;  MSE(train) = 0.1157 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.1150 ;  MSE(train) = 0.1150 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.1144 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.3691 ;  MSE(train) = 0.3691 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.2324 ;  MSE(train) = 0.2324 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0537 ;  MSE(train) = 0.0537 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0452 ;  MSE(train) = 0.0452 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0415 ;  MSE(train) = 0.0415 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0391 ;  MSE(train) = 0.0391 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0372 ;  MSE(train) = 0.0372 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0356 ;  MSE(train) = 0.0356 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0344 ;  MSE(train) = 0.0344 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0333 ;  MSE(train) = 0.0333 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0324 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.4278 ;  MSE(train) = 0.4278 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.5043 ;  MSE(train) = 0.5043 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1131 ;  MSE(train) = 0.1131 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0739 ;  MSE(train) = 0.0739 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0601 ;  MSE(train) = 0.0601 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0549 ;  MSE(train) = 0.0549 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0514 ;  MSE(train) = 0.0514 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0499 ;  MSE(train) = 0.0499 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0483 ;  MSE(train) = 0.0483 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0472 ;  MSE(train) = 0.0472 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0464 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.4391 ;  MSE(train) = 0.4391 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.4683 ;  MSE(train) = 0.4683 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1002 ;  MSE(train) = 0.1002 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0809 ;  MSE(train) = 0.0809 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0748 ;  MSE(train) = 0.0748 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0709 ;  MSE(train) = 0.0709 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0683 ;  MSE(train) = 0.0683 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0663 ;  MSE(train) = 0.0663 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0646 ;  MSE(train) = 0.0646 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0633 ;  MSE(train) = 0.0633 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0621 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.4216 ;  MSE(train) = 0.4216 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.5286 ;  MSE(train) = 0.5286 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0888 ;  MSE(train) = 0.0888 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0783 ;  MSE(train) = 0.0783 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0735 ;  MSE(train) = 0.0735 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0704 ;  MSE(train) = 0.0704 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0679 ;  MSE(train) = 0.0679 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0659 ;  MSE(train) = 0.0659 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0642 ;  MSE(train) = 0.0642 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0629 ;  MSE(train) = 0.0629 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0617 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.3946 ;  MSE(train) = 0.3946 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.4294 ;  MSE(train) = 0.4294 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1107 ;  MSE(train) = 0.1107 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0863 ;  MSE(train) = 0.0863 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0827 ;  MSE(train) = 0.0827 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0799 ;  MSE(train) = 0.0799 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0777 ;  MSE(train) = 0.0777 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0759 ;  MSE(train) = 0.0759 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0743 ;  MSE(train) = 0.0743 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0729 ;  MSE(train) = 0.0729 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0716 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.4012 ;  MSE(train) = 0.4012 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.3085 ;  MSE(train) = 0.3085 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0707 ;  MSE(train) = 0.0707 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0514 ;  MSE(train) = 0.0514 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0470 ;  MSE(train) = 0.0470 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0448 ;  MSE(train) = 0.0448 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0431 ;  MSE(train) = 0.0431 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0417 ;  MSE(train) = 0.0417 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0406 ;  MSE(train) = 0.0406 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0397 ;  MSE(train) = 0.0397 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0389 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.3859 ;  MSE(train) = 0.3859 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.3007 ;  MSE(train) = 0.3007 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0788 ;  MSE(train) = 0.0788 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0658 ;  MSE(train) = 0.0658 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0605 ;  MSE(train) = 0.0605 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0572 ;  MSE(train) = 0.0572 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0547 ;  MSE(train) = 0.0547 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0524 ;  MSE(train) = 0.0524 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0499 ;  MSE(train) = 0.0499 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0457 ;  MSE(train) = 0.0457 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0389 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.3973 ;  MSE(train) = 0.3973 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.3877 ;  MSE(train) = 0.3877 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0737 ;  MSE(train) = 0.0737 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0615 ;  MSE(train) = 0.0615 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0564 ;  MSE(train) = 0.0564 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0534 ;  MSE(train) = 0.0534 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0513 ;  MSE(train) = 0.0513 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0497 ;  MSE(train) = 0.0497 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0484 ;  MSE(train) = 0.0484 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0473 ;  MSE(train) = 0.0473 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0463 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.4462 ;  MSE(train) = 0.4462 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.3972 ;  MSE(train) = 0.3972 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0749 ;  MSE(train) = 0.0749 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0621 ;  MSE(train) = 0.0621 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0560 ;  MSE(train) = 0.0560 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0519 ;  MSE(train) = 0.0519 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0488 ;  MSE(train) = 0.0488 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0462 ;  MSE(train) = 0.0462 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0440 ;  MSE(train) = 0.0440 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0421 ;  MSE(train) = 0.0421 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0403 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.4163 ;  MSE(train) = 0.4163 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.5112 ;  MSE(train) = 0.5112 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0911 ;  MSE(train) = 0.0911 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0665 ;  MSE(train) = 0.0665 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0623 ;  MSE(train) = 0.0623 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0592 ;  MSE(train) = 0.0592 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0570 ;  MSE(train) = 0.0570 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0555 ;  MSE(train) = 0.0555 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0543 ;  MSE(train) = 0.0543 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0533 ;  MSE(train) = 0.0533 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0524 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.3430 ;  MSE(train) = 0.3430 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.4678 ;  MSE(train) = 0.4678 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1043 ;  MSE(train) = 0.1043 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0662 ;  MSE(train) = 0.0662 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0581 ;  MSE(train) = 0.0581 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0530 ;  MSE(train) = 0.0530 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0499 ;  MSE(train) = 0.0499 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0480 ;  MSE(train) = 0.0480 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0467 ;  MSE(train) = 0.0467 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0456 ;  MSE(train) = 0.0456 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0447 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.3165 ;  MSE(train) = 0.3165 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.4558 ;  MSE(train) = 0.4558 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1067 ;  MSE(train) = 0.1067 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0760 ;  MSE(train) = 0.0760 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0647 ;  MSE(train) = 0.0647 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0616 ;  MSE(train) = 0.0616 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0540 ;  MSE(train) = 0.0540 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0453 ;  MSE(train) = 0.0453 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0448 ;  MSE(train) = 0.0448 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0444 ;  MSE(train) = 0.0444 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0440 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.3375 ;  MSE(train) = 0.3375 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.4481 ;  MSE(train) = 0.4481 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1172 ;  MSE(train) = 0.1172 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0865 ;  MSE(train) = 0.0865 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0810 ;  MSE(train) = 0.0810 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0720 ;  MSE(train) = 0.0720 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0670 ;  MSE(train) = 0.0670 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0571 ;  MSE(train) = 0.0571 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0481 ;  MSE(train) = 0.0481 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0467 ;  MSE(train) = 0.0467 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0457 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.3315 ;  MSE(train) = 0.3315 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.3795 ;  MSE(train) = 0.3795 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1010 ;  MSE(train) = 0.1010 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0585 ;  MSE(train) = 0.0585 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0478 ;  MSE(train) = 0.0478 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0454 ;  MSE(train) = 0.0454 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0432 ;  MSE(train) = 0.0432 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0415 ;  MSE(train) = 0.0415 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0404 ;  MSE(train) = 0.0404 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0396 ;  MSE(train) = 0.0396 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0389 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.3328 ;  MSE(train) = 0.3328 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.4679 ;  MSE(train) = 0.4679 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1066 ;  MSE(train) = 0.1066 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0657 ;  MSE(train) = 0.0657 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0553 ;  MSE(train) = 0.0553 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0511 ;  MSE(train) = 0.0511 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0484 ;  MSE(train) = 0.0484 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0465 ;  MSE(train) = 0.0465 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0450 ;  MSE(train) = 0.0450 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0414 ;  MSE(train) = 0.0414 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0407 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.3438 ;  MSE(train) = 0.3438 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.4886 ;  MSE(train) = 0.4886 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1131 ;  MSE(train) = 0.1131 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0877 ;  MSE(train) = 0.0877 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0789 ;  MSE(train) = 0.0789 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0699 ;  MSE(train) = 0.0699 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0644 ;  MSE(train) = 0.0644 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0586 ;  MSE(train) = 0.0586 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0530 ;  MSE(train) = 0.0530 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0495 ;  MSE(train) = 0.0495 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0475 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.3231 ;  MSE(train) = 0.3231 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.3328 ;  MSE(train) = 0.3328 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0847 ;  MSE(train) = 0.0847 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0555 ;  MSE(train) = 0.0555 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0428 ;  MSE(train) = 0.0428 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0388 ;  MSE(train) = 0.0388 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0376 ;  MSE(train) = 0.0376 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0366 ;  MSE(train) = 0.0366 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0358 ;  MSE(train) = 0.0358 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0352 ;  MSE(train) = 0.0352 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0346 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.3044 ;  MSE(train) = 0.3044 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.2702 ;  MSE(train) = 0.2702 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0725 ;  MSE(train) = 0.0725 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0448 ;  MSE(train) = 0.0448 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0382 ;  MSE(train) = 0.0382 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0345 ;  MSE(train) = 0.0345 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0320 ;  MSE(train) = 0.0320 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0301 ;  MSE(train) = 0.0301 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0286 ;  MSE(train) = 0.0286 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0273 ;  MSE(train) = 0.0273 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0262 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.3680 ;  MSE(train) = 0.3680 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.3929 ;  MSE(train) = 0.3929 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0953 ;  MSE(train) = 0.0953 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0608 ;  MSE(train) = 0.0608 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0539 ;  MSE(train) = 0.0539 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0497 ;  MSE(train) = 0.0497 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0467 ;  MSE(train) = 0.0467 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0434 ;  MSE(train) = 0.0434 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0318 ;  MSE(train) = 0.0318 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0283 ;  MSE(train) = 0.0283 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0273 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.3495 ;  MSE(train) = 0.3495 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.3424 ;  MSE(train) = 0.3424 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0964 ;  MSE(train) = 0.0964 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0632 ;  MSE(train) = 0.0632 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0573 ;  MSE(train) = 0.0573 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0540 ;  MSE(train) = 0.0540 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0517 ;  MSE(train) = 0.0517 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0500 ;  MSE(train) = 0.0500 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0486 ;  MSE(train) = 0.0486 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0474 ;  MSE(train) = 0.0474 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0462 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.2840 ;  MSE(train) = 0.2840 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1869 ;  MSE(train) = 0.1869 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0719 ;  MSE(train) = 0.0719 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0214 ;  MSE(train) = 0.0214 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0189 ;  MSE(train) = 0.0189 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0172 ;  MSE(train) = 0.0172 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0160 ;  MSE(train) = 0.0160 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0151 ;  MSE(train) = 0.0151 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0144 ;  MSE(train) = 0.0144 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0138 ;  MSE(train) = 0.0138 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0133 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.2776 ;  MSE(train) = 0.2776 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.2400 ;  MSE(train) = 0.2400 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0783 ;  MSE(train) = 0.0783 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0262 ;  MSE(train) = 0.0262 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0243 ;  MSE(train) = 0.0243 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0231 ;  MSE(train) = 0.0231 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0221 ;  MSE(train) = 0.0221 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0214 ;  MSE(train) = 0.0214 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0208 ;  MSE(train) = 0.0208 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0202 ;  MSE(train) = 0.0202 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0198 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.2663 ;  MSE(train) = 0.2663 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.2324 ;  MSE(train) = 0.2324 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0759 ;  MSE(train) = 0.0759 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0253 ;  MSE(train) = 0.0253 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0220 ;  MSE(train) = 0.0220 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0203 ;  MSE(train) = 0.0203 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0193 ;  MSE(train) = 0.0193 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0185 ;  MSE(train) = 0.0185 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0179 ;  MSE(train) = 0.0179 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0174 ;  MSE(train) = 0.0174 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0169 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.2653 ;  MSE(train) = 0.2653 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.2365 ;  MSE(train) = 0.2365 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0943 ;  MSE(train) = 0.0943 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0222 ;  MSE(train) = 0.0222 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0204 ;  MSE(train) = 0.0204 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0193 ;  MSE(train) = 0.0193 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0185 ;  MSE(train) = 0.0185 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0179 ;  MSE(train) = 0.0179 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0174 ;  MSE(train) = 0.0174 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0170 ;  MSE(train) = 0.0170 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0166 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.2701 ;  MSE(train) = 0.2701 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1848 ;  MSE(train) = 0.1848 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0716 ;  MSE(train) = 0.0716 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0195 ;  MSE(train) = 0.0195 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0179 ;  MSE(train) = 0.0179 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0167 ;  MSE(train) = 0.0167 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0158 ;  MSE(train) = 0.0158 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0151 ;  MSE(train) = 0.0151 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0145 ;  MSE(train) = 0.0145 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0140 ;  MSE(train) = 0.0140 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0136 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.2646 ;  MSE(train) = 0.2646 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.2043 ;  MSE(train) = 0.2043 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0823 ;  MSE(train) = 0.0823 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0250 ;  MSE(train) = 0.0250 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0230 ;  MSE(train) = 0.0230 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0216 ;  MSE(train) = 0.0216 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0207 ;  MSE(train) = 0.0207 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0199 ;  MSE(train) = 0.0199 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0193 ;  MSE(train) = 0.0193 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0188 ;  MSE(train) = 0.0188 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0184 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.2621 ;  MSE(train) = 0.2621 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.2384 ;  MSE(train) = 0.2384 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0706 ;  MSE(train) = 0.0706 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0283 ;  MSE(train) = 0.0283 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0253 ;  MSE(train) = 0.0253 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0236 ;  MSE(train) = 0.0236 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0223 ;  MSE(train) = 0.0223 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0213 ;  MSE(train) = 0.0213 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0205 ;  MSE(train) = 0.0205 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0198 ;  MSE(train) = 0.0198 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0191 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.2541 ;  MSE(train) = 0.2541 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.2644 ;  MSE(train) = 0.2644 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0889 ;  MSE(train) = 0.0889 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0311 ;  MSE(train) = 0.0311 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0290 ;  MSE(train) = 0.0290 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0274 ;  MSE(train) = 0.0274 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0261 ;  MSE(train) = 0.0261 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0251 ;  MSE(train) = 0.0251 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0243 ;  MSE(train) = 0.0243 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0236 ;  MSE(train) = 0.0236 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0230 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.2729 ;  MSE(train) = 0.2729 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.2836 ;  MSE(train) = 0.2836 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1047 ;  MSE(train) = 0.1047 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0313 ;  MSE(train) = 0.0313 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0242 ;  MSE(train) = 0.0242 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0230 ;  MSE(train) = 0.0230 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0221 ;  MSE(train) = 0.0221 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0214 ;  MSE(train) = 0.0214 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0208 ;  MSE(train) = 0.0208 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0203 ;  MSE(train) = 0.0203 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0198 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.2748 ;  MSE(train) = 0.2748 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.2386 ;  MSE(train) = 0.2386 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0881 ;  MSE(train) = 0.0881 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0274 ;  MSE(train) = 0.0274 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0244 ;  MSE(train) = 0.0244 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0221 ;  MSE(train) = 0.0221 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0201 ;  MSE(train) = 0.0201 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0190 ;  MSE(train) = 0.0190 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0182 ;  MSE(train) = 0.0182 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0176 ;  MSE(train) = 0.0176 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0171 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1921 ;  MSE(train) = 0.1921 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1699 ;  MSE(train) = 0.1699 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0934 ;  MSE(train) = 0.0934 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0121 ;  MSE(train) = 0.0121 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0118 ;  MSE(train) = 0.0118 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0116 ;  MSE(train) = 0.0116 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0113 ;  MSE(train) = 0.0113 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0111 ;  MSE(train) = 0.0111 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0109 ;  MSE(train) = 0.0109 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0108 ;  MSE(train) = 0.0108 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0106 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.2009 ;  MSE(train) = 0.2009 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1974 ;  MSE(train) = 0.1974 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0847 ;  MSE(train) = 0.0847 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0142 ;  MSE(train) = 0.0142 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0134 ;  MSE(train) = 0.0134 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0128 ;  MSE(train) = 0.0128 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0123 ;  MSE(train) = 0.0123 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0119 ;  MSE(train) = 0.0119 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0115 ;  MSE(train) = 0.0115 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0111 ;  MSE(train) = 0.0111 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0108 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.2182 ;  MSE(train) = 0.2182 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1965 ;  MSE(train) = 0.1965 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0937 ;  MSE(train) = 0.0937 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0122 ;  MSE(train) = 0.0122 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0116 ;  MSE(train) = 0.0116 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0110 ;  MSE(train) = 0.0110 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0106 ;  MSE(train) = 0.0106 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0102 ;  MSE(train) = 0.0102 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0099 ;  MSE(train) = 0.0099 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0097 ;  MSE(train) = 0.0097 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0094 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.2088 ;  MSE(train) = 0.2088 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.2413 ;  MSE(train) = 0.2413 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0925 ;  MSE(train) = 0.0925 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0165 ;  MSE(train) = 0.0165 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0140 ;  MSE(train) = 0.0140 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0128 ;  MSE(train) = 0.0128 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0121 ;  MSE(train) = 0.0121 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0116 ;  MSE(train) = 0.0116 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0112 ;  MSE(train) = 0.0112 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0109 ;  MSE(train) = 0.0109 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0107 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.2059 ;  MSE(train) = 0.2059 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1643 ;  MSE(train) = 0.1643 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0835 ;  MSE(train) = 0.0835 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0104 ;  MSE(train) = 0.0104 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0098 ;  MSE(train) = 0.0098 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0093 ;  MSE(train) = 0.0093 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0089 ;  MSE(train) = 0.0089 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0086 ;  MSE(train) = 0.0086 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0083 ;  MSE(train) = 0.0083 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0080 ;  MSE(train) = 0.0080 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0078 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1800 ;  MSE(train) = 0.1800 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1633 ;  MSE(train) = 0.1633 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0827 ;  MSE(train) = 0.0827 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0096 ;  MSE(train) = 0.0096 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0093 ;  MSE(train) = 0.0093 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0091 ;  MSE(train) = 0.0091 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0088 ;  MSE(train) = 0.0088 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0086 ;  MSE(train) = 0.0086 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0085 ;  MSE(train) = 0.0085 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0083 ;  MSE(train) = 0.0083 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0082 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.2026 ;  MSE(train) = 0.2026 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1577 ;  MSE(train) = 0.1577 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0905 ;  MSE(train) = 0.0905 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0100 ;  MSE(train) = 0.0100 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0096 ;  MSE(train) = 0.0096 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0092 ;  MSE(train) = 0.0092 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0089 ;  MSE(train) = 0.0089 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0086 ;  MSE(train) = 0.0086 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0083 ;  MSE(train) = 0.0083 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0081 ;  MSE(train) = 0.0081 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0079 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.2090 ;  MSE(train) = 0.2090 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1591 ;  MSE(train) = 0.1591 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0938 ;  MSE(train) = 0.0938 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0091 ;  MSE(train) = 0.0091 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0087 ;  MSE(train) = 0.0087 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0083 ;  MSE(train) = 0.0083 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0080 ;  MSE(train) = 0.0080 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0078 ;  MSE(train) = 0.0078 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0075 ;  MSE(train) = 0.0075 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0073 ;  MSE(train) = 0.0073 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0071 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.2194 ;  MSE(train) = 0.2194 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.2078 ;  MSE(train) = 0.2078 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0926 ;  MSE(train) = 0.0926 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0107 ;  MSE(train) = 0.0107 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0102 ;  MSE(train) = 0.0102 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0099 ;  MSE(train) = 0.0099 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0096 ;  MSE(train) = 0.0096 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0093 ;  MSE(train) = 0.0093 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0091 ;  MSE(train) = 0.0091 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0089 ;  MSE(train) = 0.0089 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0087 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.2068 ;  MSE(train) = 0.2068 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1570 ;  MSE(train) = 0.1570 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0926 ;  MSE(train) = 0.0926 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0102 ;  MSE(train) = 0.0102 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0097 ;  MSE(train) = 0.0097 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0093 ;  MSE(train) = 0.0093 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0090 ;  MSE(train) = 0.0090 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0087 ;  MSE(train) = 0.0087 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0084 ;  MSE(train) = 0.0084 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0082 ;  MSE(train) = 0.0082 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0080 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1706 ;  MSE(train) = 0.1706 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1281 ;  MSE(train) = 0.1281 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0997 ;  MSE(train) = 0.0997 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0066 ;  MSE(train) = 0.0066 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0064 ;  MSE(train) = 0.0064 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0063 ;  MSE(train) = 0.0063 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0061 ;  MSE(train) = 0.0061 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0060 ;  MSE(train) = 0.0060 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0059 ;  MSE(train) = 0.0059 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0057 ;  MSE(train) = 0.0057 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0056 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1598 ;  MSE(train) = 0.1598 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1496 ;  MSE(train) = 0.1496 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0936 ;  MSE(train) = 0.0936 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0081 ;  MSE(train) = 0.0081 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0079 ;  MSE(train) = 0.0079 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0078 ;  MSE(train) = 0.0078 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0076 ;  MSE(train) = 0.0076 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0075 ;  MSE(train) = 0.0075 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0074 ;  MSE(train) = 0.0074 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0073 ;  MSE(train) = 0.0073 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0072 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1426 ;  MSE(train) = 0.1426 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1114 ;  MSE(train) = 0.1114 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0933 ;  MSE(train) = 0.0933 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0059 ;  MSE(train) = 0.0059 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0058 ;  MSE(train) = 0.0058 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0057 ;  MSE(train) = 0.0057 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0056 ;  MSE(train) = 0.0056 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0055 ;  MSE(train) = 0.0055 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0054 ;  MSE(train) = 0.0054 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0054 ;  MSE(train) = 0.0054 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0053 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1538 ;  MSE(train) = 0.1538 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1084 ;  MSE(train) = 0.1084 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0908 ;  MSE(train) = 0.0908 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0063 ;  MSE(train) = 0.0063 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0061 ;  MSE(train) = 0.0061 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0059 ;  MSE(train) = 0.0059 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0058 ;  MSE(train) = 0.0058 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0056 ;  MSE(train) = 0.0056 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0055 ;  MSE(train) = 0.0055 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0054 ;  MSE(train) = 0.0054 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0053 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1652 ;  MSE(train) = 0.1652 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1749 ;  MSE(train) = 0.1749 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1019 ;  MSE(train) = 0.1019 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0052 ;  MSE(train) = 0.0052 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0051 ;  MSE(train) = 0.0051 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0050 ;  MSE(train) = 0.0050 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0049 ;  MSE(train) = 0.0049 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0048 ;  MSE(train) = 0.0048 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0047 ;  MSE(train) = 0.0047 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0047 ;  MSE(train) = 0.0047 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1711 ;  MSE(train) = 0.1711 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1476 ;  MSE(train) = 0.1476 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0923 ;  MSE(train) = 0.0923 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0065 ;  MSE(train) = 0.0065 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0063 ;  MSE(train) = 0.0063 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0062 ;  MSE(train) = 0.0062 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0060 ;  MSE(train) = 0.0060 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0059 ;  MSE(train) = 0.0059 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0057 ;  MSE(train) = 0.0057 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0056 ;  MSE(train) = 0.0056 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0055 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1595 ;  MSE(train) = 0.1595 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1113 ;  MSE(train) = 0.1113 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0984 ;  MSE(train) = 0.0984 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0060 ;  MSE(train) = 0.0060 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0058 ;  MSE(train) = 0.0058 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0056 ;  MSE(train) = 0.0056 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0055 ;  MSE(train) = 0.0055 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0053 ;  MSE(train) = 0.0053 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0052 ;  MSE(train) = 0.0052 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0051 ;  MSE(train) = 0.0051 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0050 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1667 ;  MSE(train) = 0.1667 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1223 ;  MSE(train) = 0.1223 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0886 ;  MSE(train) = 0.0886 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0066 ;  MSE(train) = 0.0066 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0063 ;  MSE(train) = 0.0063 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0060 ;  MSE(train) = 0.0060 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0058 ;  MSE(train) = 0.0058 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0055 ;  MSE(train) = 0.0055 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0054 ;  MSE(train) = 0.0054 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0052 ;  MSE(train) = 0.0052 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0051 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1544 ;  MSE(train) = 0.1544 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1040 ;  MSE(train) = 0.1040 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0910 ;  MSE(train) = 0.0910 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0064 ;  MSE(train) = 0.0064 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0062 ;  MSE(train) = 0.0062 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0060 ;  MSE(train) = 0.0060 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0058 ;  MSE(train) = 0.0058 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0057 ;  MSE(train) = 0.0057 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0055 ;  MSE(train) = 0.0055 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0054 ;  MSE(train) = 0.0054 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0053 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1546 ;  MSE(train) = 0.1546 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1418 ;  MSE(train) = 0.1418 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0951 ;  MSE(train) = 0.0951 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0054 ;  MSE(train) = 0.0054 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0053 ;  MSE(train) = 0.0053 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0052 ;  MSE(train) = 0.0052 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0051 ;  MSE(train) = 0.0051 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0050 ;  MSE(train) = 0.0050 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0049 ;  MSE(train) = 0.0049 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0048 ;  MSE(train) = 0.0048 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0048 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1153 ;  MSE(train) = 0.1153 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0827 ;  MSE(train) = 0.0827 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0883 ;  MSE(train) = 0.0883 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(train) = 0.0046 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0044 ;  MSE(train) = 0.0044 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0043 ;  MSE(train) = 0.0043 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0042 ;  MSE(train) = 0.0042 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0040 ;  MSE(train) = 0.0040 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1209 ;  MSE(train) = 0.1209 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0928 ;  MSE(train) = 0.0928 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0912 ;  MSE(train) = 0.0912 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0050 ;  MSE(train) = 0.0050 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0048 ;  MSE(train) = 0.0048 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0047 ;  MSE(train) = 0.0047 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(train) = 0.0046 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0045 ;  MSE(train) = 0.0045 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0044 ;  MSE(train) = 0.0044 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0043 ;  MSE(train) = 0.0043 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0043 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1257 ;  MSE(train) = 0.1257 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1044 ;  MSE(train) = 0.1044 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0928 ;  MSE(train) = 0.0928 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0052 ;  MSE(train) = 0.0052 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0051 ;  MSE(train) = 0.0051 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0049 ;  MSE(train) = 0.0049 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0048 ;  MSE(train) = 0.0048 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0047 ;  MSE(train) = 0.0047 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(train) = 0.0046 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0045 ;  MSE(train) = 0.0045 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0044 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1217 ;  MSE(train) = 0.1217 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1006 ;  MSE(train) = 0.1006 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0927 ;  MSE(train) = 0.0927 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0047 ;  MSE(train) = 0.0047 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(train) = 0.0046 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0045 ;  MSE(train) = 0.0045 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0044 ;  MSE(train) = 0.0044 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0043 ;  MSE(train) = 0.0043 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0042 ;  MSE(train) = 0.0042 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0042 ;  MSE(train) = 0.0042 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1283 ;  MSE(train) = 0.1283 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1137 ;  MSE(train) = 0.1137 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0918 ;  MSE(train) = 0.0918 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0045 ;  MSE(train) = 0.0045 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0043 ;  MSE(train) = 0.0043 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0042 ;  MSE(train) = 0.0042 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0040 ;  MSE(train) = 0.0040 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0038 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1212 ;  MSE(train) = 0.1212 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1304 ;  MSE(train) = 0.1304 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0972 ;  MSE(train) = 0.0972 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0037 ;  MSE(train) = 0.0037 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0037 ;  MSE(train) = 0.0037 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0036 ;  MSE(train) = 0.0036 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0036 ;  MSE(train) = 0.0036 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0035 ;  MSE(train) = 0.0035 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0035 ;  MSE(train) = 0.0035 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0034 ;  MSE(train) = 0.0034 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0034 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1230 ;  MSE(train) = 0.1230 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.1025 ;  MSE(train) = 0.1025 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.1071 ;  MSE(train) = 0.1071 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0043 ;  MSE(train) = 0.0043 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0043 ;  MSE(train) = 0.0043 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0042 ;  MSE(train) = 0.0042 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0040 ;  MSE(train) = 0.0040 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1201 ;  MSE(train) = 0.1201 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0880 ;  MSE(train) = 0.0880 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0991 ;  MSE(train) = 0.0991 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0049 ;  MSE(train) = 0.0049 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0048 ;  MSE(train) = 0.0048 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0047 ;  MSE(train) = 0.0047 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(train) = 0.0046 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0045 ;  MSE(train) = 0.0045 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0044 ;  MSE(train) = 0.0044 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0044 ;  MSE(train) = 0.0044 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0043 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1244 ;  MSE(train) = 0.1244 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0915 ;  MSE(train) = 0.0915 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0901 ;  MSE(train) = 0.0901 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0055 ;  MSE(train) = 0.0055 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0053 ;  MSE(train) = 0.0053 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0051 ;  MSE(train) = 0.0051 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0050 ;  MSE(train) = 0.0050 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0049 ;  MSE(train) = 0.0049 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0047 ;  MSE(train) = 0.0047 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(train) = 0.0046 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0045 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.1239 ;  MSE(train) = 0.1239 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0952 ;  MSE(train) = 0.0952 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0891 ;  MSE(train) = 0.0891 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0045 ;  MSE(train) = 0.0045 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0044 ;  MSE(train) = 0.0044 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0042 ;  MSE(train) = 0.0042 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0040 ;  MSE(train) = 0.0040 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0038 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0916 ;  MSE(train) = 0.0916 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0674 ;  MSE(train) = 0.0674 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0818 ;  MSE(train) = 0.0818 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0056 ;  MSE(train) = 0.0056 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0053 ;  MSE(train) = 0.0053 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0050 ;  MSE(train) = 0.0050 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0049 ;  MSE(train) = 0.0049 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0047 ;  MSE(train) = 0.0047 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(train) = 0.0046 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0045 ;  MSE(train) = 0.0045 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0044 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0913 ;  MSE(train) = 0.0913 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0716 ;  MSE(train) = 0.0716 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0780 ;  MSE(train) = 0.0780 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0042 ;  MSE(train) = 0.0042 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0040 ;  MSE(train) = 0.0040 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0038 ;  MSE(train) = 0.0038 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0036 ;  MSE(train) = 0.0036 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0036 ;  MSE(train) = 0.0036 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0035 ;  MSE(train) = 0.0035 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0034 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0944 ;  MSE(train) = 0.0944 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0787 ;  MSE(train) = 0.0787 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0791 ;  MSE(train) = 0.0791 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0040 ;  MSE(train) = 0.0040 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0038 ;  MSE(train) = 0.0038 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0037 ;  MSE(train) = 0.0037 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0036 ;  MSE(train) = 0.0036 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0035 ;  MSE(train) = 0.0035 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0035 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0940 ;  MSE(train) = 0.0940 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0749 ;  MSE(train) = 0.0749 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0855 ;  MSE(train) = 0.0855 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0049 ;  MSE(train) = 0.0049 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(train) = 0.0046 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0044 ;  MSE(train) = 0.0044 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0042 ;  MSE(train) = 0.0042 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0040 ;  MSE(train) = 0.0040 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0870 ;  MSE(train) = 0.0870 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0647 ;  MSE(train) = 0.0647 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0838 ;  MSE(train) = 0.0838 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0036 ;  MSE(train) = 0.0036 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0035 ;  MSE(train) = 0.0035 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0033 ;  MSE(train) = 0.0033 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0032 ;  MSE(train) = 0.0032 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0031 ;  MSE(train) = 0.0031 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0031 ;  MSE(train) = 0.0031 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0030 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0926 ;  MSE(train) = 0.0926 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0747 ;  MSE(train) = 0.0747 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0840 ;  MSE(train) = 0.0840 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0043 ;  MSE(train) = 0.0043 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0038 ;  MSE(train) = 0.0038 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0037 ;  MSE(train) = 0.0037 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0036 ;  MSE(train) = 0.0036 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0035 ;  MSE(train) = 0.0035 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0034 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0941 ;  MSE(train) = 0.0941 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0776 ;  MSE(train) = 0.0776 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0827 ;  MSE(train) = 0.0827 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0045 ;  MSE(train) = 0.0045 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0043 ;  MSE(train) = 0.0043 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0042 ;  MSE(train) = 0.0042 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0040 ;  MSE(train) = 0.0040 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0038 ;  MSE(train) = 0.0038 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0037 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0953 ;  MSE(train) = 0.0953 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0773 ;  MSE(train) = 0.0773 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0827 ;  MSE(train) = 0.0827 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0042 ;  MSE(train) = 0.0042 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0040 ;  MSE(train) = 0.0040 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0038 ;  MSE(train) = 0.0038 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0037 ;  MSE(train) = 0.0037 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0036 ;  MSE(train) = 0.0036 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0035 ;  MSE(train) = 0.0035 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0034 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0879 ;  MSE(train) = 0.0879 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0665 ;  MSE(train) = 0.0665 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0784 ;  MSE(train) = 0.0784 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0045 ;  MSE(train) = 0.0045 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0042 ;  MSE(train) = 0.0042 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0040 ;  MSE(train) = 0.0040 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0037 ;  MSE(train) = 0.0037 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0037 ;  MSE(train) = 0.0037 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0036 ;  MSE(train) = 0.0036 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0035 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0964 ;  MSE(train) = 0.0964 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0757 ;  MSE(train) = 0.0757 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0812 ;  MSE(train) = 0.0812 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0048 ;  MSE(train) = 0.0048 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(train) = 0.0046 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0044 ;  MSE(train) = 0.0044 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0043 ;  MSE(train) = 0.0043 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0042 ;  MSE(train) = 0.0042 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0040 ;  MSE(train) = 0.0040 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0759 ;  MSE(train) = 0.0759 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0630 ;  MSE(train) = 0.0630 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0717 ;  MSE(train) = 0.0717 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0059 ;  MSE(train) = 0.0059 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0050 ;  MSE(train) = 0.0050 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(train) = 0.0046 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0043 ;  MSE(train) = 0.0043 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0040 ;  MSE(train) = 0.0040 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0038 ;  MSE(train) = 0.0038 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0037 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0689 ;  MSE(train) = 0.0689 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0595 ;  MSE(train) = 0.0595 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0694 ;  MSE(train) = 0.0694 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0050 ;  MSE(train) = 0.0050 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0042 ;  MSE(train) = 0.0042 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0036 ;  MSE(train) = 0.0036 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0035 ;  MSE(train) = 0.0035 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0033 ;  MSE(train) = 0.0033 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0032 ;  MSE(train) = 0.0032 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0031 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0719 ;  MSE(train) = 0.0719 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0597 ;  MSE(train) = 0.0597 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0673 ;  MSE(train) = 0.0673 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0053 ;  MSE(train) = 0.0053 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0048 ;  MSE(train) = 0.0048 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0045 ;  MSE(train) = 0.0045 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0043 ;  MSE(train) = 0.0043 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0040 ;  MSE(train) = 0.0040 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0038 ;  MSE(train) = 0.0038 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0037 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0694 ;  MSE(train) = 0.0694 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0534 ;  MSE(train) = 0.0534 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0651 ;  MSE(train) = 0.0651 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0059 ;  MSE(train) = 0.0059 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0051 ;  MSE(train) = 0.0051 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(train) = 0.0046 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0044 ;  MSE(train) = 0.0044 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0038 ;  MSE(train) = 0.0038 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0037 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0729 ;  MSE(train) = 0.0729 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0718 ;  MSE(train) = 0.0718 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0677 ;  MSE(train) = 0.0677 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(train) = 0.0046 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0043 ;  MSE(train) = 0.0043 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0040 ;  MSE(train) = 0.0040 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0037 ;  MSE(train) = 0.0037 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0036 ;  MSE(train) = 0.0036 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0035 ;  MSE(train) = 0.0035 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0034 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0691 ;  MSE(train) = 0.0691 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0482 ;  MSE(train) = 0.0482 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0619 ;  MSE(train) = 0.0619 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0065 ;  MSE(train) = 0.0065 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0056 ;  MSE(train) = 0.0056 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0051 ;  MSE(train) = 0.0051 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0048 ;  MSE(train) = 0.0048 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(train) = 0.0046 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0044 ;  MSE(train) = 0.0044 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0042 ;  MSE(train) = 0.0042 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0040 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0714 ;  MSE(train) = 0.0714 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0492 ;  MSE(train) = 0.0492 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0644 ;  MSE(train) = 0.0644 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0071 ;  MSE(train) = 0.0071 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0057 ;  MSE(train) = 0.0057 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0052 ;  MSE(train) = 0.0052 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0049 ;  MSE(train) = 0.0049 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(train) = 0.0046 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0044 ;  MSE(train) = 0.0044 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0042 ;  MSE(train) = 0.0042 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0692 ;  MSE(train) = 0.0692 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0509 ;  MSE(train) = 0.0509 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0671 ;  MSE(train) = 0.0671 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0064 ;  MSE(train) = 0.0064 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0050 ;  MSE(train) = 0.0050 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(train) = 0.0046 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0043 ;  MSE(train) = 0.0043 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0040 ;  MSE(train) = 0.0040 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0038 ;  MSE(train) = 0.0038 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0036 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0682 ;  MSE(train) = 0.0682 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0556 ;  MSE(train) = 0.0556 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0675 ;  MSE(train) = 0.0675 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0049 ;  MSE(train) = 0.0049 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0043 ;  MSE(train) = 0.0043 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0037 ;  MSE(train) = 0.0037 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0035 ;  MSE(train) = 0.0035 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0034 ;  MSE(train) = 0.0034 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0033 ;  MSE(train) = 0.0033 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0032 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0739 ;  MSE(train) = 0.0739 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0521 ;  MSE(train) = 0.0521 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0631 ;  MSE(train) = 0.0631 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0073 ;  MSE(train) = 0.0073 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0060 ;  MSE(train) = 0.0060 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0054 ;  MSE(train) = 0.0054 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0050 ;  MSE(train) = 0.0050 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0047 ;  MSE(train) = 0.0047 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0045 ;  MSE(train) = 0.0045 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0043 ;  MSE(train) = 0.0043 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0525 ;  MSE(train) = 0.0525 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0337 ;  MSE(train) = 0.0337 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0462 ;  MSE(train) = 0.0462 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0097 ;  MSE(train) = 0.0097 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0064 ;  MSE(train) = 0.0064 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0057 ;  MSE(train) = 0.0057 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0052 ;  MSE(train) = 0.0052 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0047 ;  MSE(train) = 0.0047 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0044 ;  MSE(train) = 0.0044 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0526 ;  MSE(train) = 0.0526 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0414 ;  MSE(train) = 0.0414 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0559 ;  MSE(train) = 0.0559 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0080 ;  MSE(train) = 0.0080 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0042 ;  MSE(train) = 0.0042 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0037 ;  MSE(train) = 0.0037 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0034 ;  MSE(train) = 0.0034 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0032 ;  MSE(train) = 0.0032 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0031 ;  MSE(train) = 0.0031 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0030 ;  MSE(train) = 0.0030 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0029 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0520 ;  MSE(train) = 0.0520 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0371 ;  MSE(train) = 0.0371 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0502 ;  MSE(train) = 0.0502 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0085 ;  MSE(train) = 0.0085 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0049 ;  MSE(train) = 0.0049 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0045 ;  MSE(train) = 0.0045 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0042 ;  MSE(train) = 0.0042 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0037 ;  MSE(train) = 0.0037 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0035 ;  MSE(train) = 0.0035 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0034 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0556 ;  MSE(train) = 0.0556 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0415 ;  MSE(train) = 0.0415 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0483 ;  MSE(train) = 0.0483 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0090 ;  MSE(train) = 0.0090 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0063 ;  MSE(train) = 0.0063 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0057 ;  MSE(train) = 0.0057 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0052 ;  MSE(train) = 0.0052 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0049 ;  MSE(train) = 0.0049 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(train) = 0.0046 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0043 ;  MSE(train) = 0.0043 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0522 ;  MSE(train) = 0.0522 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0432 ;  MSE(train) = 0.0432 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0486 ;  MSE(train) = 0.0486 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0076 ;  MSE(train) = 0.0076 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0054 ;  MSE(train) = 0.0054 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0049 ;  MSE(train) = 0.0049 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(train) = 0.0046 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0043 ;  MSE(train) = 0.0043 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0038 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0544 ;  MSE(train) = 0.0544 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0388 ;  MSE(train) = 0.0388 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0482 ;  MSE(train) = 0.0482 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0079 ;  MSE(train) = 0.0079 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0055 ;  MSE(train) = 0.0055 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0050 ;  MSE(train) = 0.0050 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(train) = 0.0046 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0043 ;  MSE(train) = 0.0043 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0037 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0482 ;  MSE(train) = 0.0482 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0294 ;  MSE(train) = 0.0294 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0446 ;  MSE(train) = 0.0446 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0095 ;  MSE(train) = 0.0095 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0064 ;  MSE(train) = 0.0064 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0057 ;  MSE(train) = 0.0057 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0051 ;  MSE(train) = 0.0051 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0047 ;  MSE(train) = 0.0047 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0044 ;  MSE(train) = 0.0044 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0038 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0514 ;  MSE(train) = 0.0514 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0414 ;  MSE(train) = 0.0414 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0533 ;  MSE(train) = 0.0533 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0091 ;  MSE(train) = 0.0091 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0045 ;  MSE(train) = 0.0045 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0038 ;  MSE(train) = 0.0038 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0036 ;  MSE(train) = 0.0036 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0034 ;  MSE(train) = 0.0034 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0033 ;  MSE(train) = 0.0033 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0031 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0484 ;  MSE(train) = 0.0484 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0449 ;  MSE(train) = 0.0449 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0517 ;  MSE(train) = 0.0517 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0056 ;  MSE(train) = 0.0056 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0038 ;  MSE(train) = 0.0038 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0034 ;  MSE(train) = 0.0034 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0032 ;  MSE(train) = 0.0032 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0030 ;  MSE(train) = 0.0030 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0029 ;  MSE(train) = 0.0029 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0028 ;  MSE(train) = 0.0028 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0027 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0547 ;  MSE(train) = 0.0547 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0392 ;  MSE(train) = 0.0392 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0499 ;  MSE(train) = 0.0499 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0089 ;  MSE(train) = 0.0089 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0062 ;  MSE(train) = 0.0062 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0056 ;  MSE(train) = 0.0056 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0051 ;  MSE(train) = 0.0051 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0048 ;  MSE(train) = 0.0048 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0045 ;  MSE(train) = 0.0045 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0042 ;  MSE(train) = 0.0042 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0040 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0406 ;  MSE(train) = 0.0406 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0254 ;  MSE(train) = 0.0254 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0333 ;  MSE(train) = 0.0333 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0122 ;  MSE(train) = 0.0122 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0081 ;  MSE(train) = 0.0081 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0064 ;  MSE(train) = 0.0064 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0055 ;  MSE(train) = 0.0055 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0049 ;  MSE(train) = 0.0049 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0045 ;  MSE(train) = 0.0045 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0038 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0401 ;  MSE(train) = 0.0401 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0267 ;  MSE(train) = 0.0267 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0332 ;  MSE(train) = 0.0332 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0112 ;  MSE(train) = 0.0112 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0082 ;  MSE(train) = 0.0082 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0068 ;  MSE(train) = 0.0068 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0059 ;  MSE(train) = 0.0059 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0053 ;  MSE(train) = 0.0053 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0048 ;  MSE(train) = 0.0048 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0044 ;  MSE(train) = 0.0044 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0409 ;  MSE(train) = 0.0409 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0275 ;  MSE(train) = 0.0275 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0346 ;  MSE(train) = 0.0346 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0119 ;  MSE(train) = 0.0119 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0078 ;  MSE(train) = 0.0078 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0063 ;  MSE(train) = 0.0063 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0056 ;  MSE(train) = 0.0056 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0050 ;  MSE(train) = 0.0050 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(train) = 0.0046 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0042 ;  MSE(train) = 0.0042 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0040 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0386 ;  MSE(train) = 0.0386 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0295 ;  MSE(train) = 0.0295 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0351 ;  MSE(train) = 0.0351 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0085 ;  MSE(train) = 0.0085 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0054 ;  MSE(train) = 0.0054 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0048 ;  MSE(train) = 0.0048 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0044 ;  MSE(train) = 0.0044 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0040 ;  MSE(train) = 0.0040 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0037 ;  MSE(train) = 0.0037 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0035 ;  MSE(train) = 0.0035 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0033 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0410 ;  MSE(train) = 0.0410 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0306 ;  MSE(train) = 0.0306 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0342 ;  MSE(train) = 0.0342 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0093 ;  MSE(train) = 0.0093 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0064 ;  MSE(train) = 0.0064 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0056 ;  MSE(train) = 0.0056 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0050 ;  MSE(train) = 0.0050 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0045 ;  MSE(train) = 0.0045 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0038 ;  MSE(train) = 0.0038 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0036 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0367 ;  MSE(train) = 0.0367 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0236 ;  MSE(train) = 0.0236 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0328 ;  MSE(train) = 0.0328 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0119 ;  MSE(train) = 0.0119 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0066 ;  MSE(train) = 0.0066 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0052 ;  MSE(train) = 0.0052 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(train) = 0.0046 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0038 ;  MSE(train) = 0.0038 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0036 ;  MSE(train) = 0.0036 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0033 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0387 ;  MSE(train) = 0.0387 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0250 ;  MSE(train) = 0.0250 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0345 ;  MSE(train) = 0.0345 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0131 ;  MSE(train) = 0.0131 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0057 ;  MSE(train) = 0.0057 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0047 ;  MSE(train) = 0.0047 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0038 ;  MSE(train) = 0.0038 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0035 ;  MSE(train) = 0.0035 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0033 ;  MSE(train) = 0.0033 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0031 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0424 ;  MSE(train) = 0.0424 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0289 ;  MSE(train) = 0.0289 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0352 ;  MSE(train) = 0.0352 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0100 ;  MSE(train) = 0.0100 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0067 ;  MSE(train) = 0.0067 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0058 ;  MSE(train) = 0.0058 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0052 ;  MSE(train) = 0.0052 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0048 ;  MSE(train) = 0.0048 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0044 ;  MSE(train) = 0.0044 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0389 ;  MSE(train) = 0.0389 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0264 ;  MSE(train) = 0.0264 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0329 ;  MSE(train) = 0.0329 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0097 ;  MSE(train) = 0.0097 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0065 ;  MSE(train) = 0.0065 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0056 ;  MSE(train) = 0.0056 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0050 ;  MSE(train) = 0.0050 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0045 ;  MSE(train) = 0.0045 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0041 ;  MSE(train) = 0.0041 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0038 ;  MSE(train) = 0.0038 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0036 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0423 ;  MSE(train) = 0.0423 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0277 ;  MSE(train) = 0.0277 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0354 ;  MSE(train) = 0.0354 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0108 ;  MSE(train) = 0.0108 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0068 ;  MSE(train) = 0.0068 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0058 ;  MSE(train) = 0.0058 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0051 ;  MSE(train) = 0.0051 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0046 ;  MSE(train) = 0.0046 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0042 ;  MSE(train) = 0.0042 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0037 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0287 ;  MSE(train) = 0.0287 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0184 ;  MSE(train) = 0.0184 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0255 ;  MSE(train) = 0.0255 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0182 ;  MSE(train) = 0.0182 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0093 ;  MSE(train) = 0.0093 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0045 ;  MSE(train) = 0.0045 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0037 ;  MSE(train) = 0.0037 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0033 ;  MSE(train) = 0.0033 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0030 ;  MSE(train) = 0.0030 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0028 ;  MSE(train) = 0.0028 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0026 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0314 ;  MSE(train) = 0.0314 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0202 ;  MSE(train) = 0.0202 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0246 ;  MSE(train) = 0.0246 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0143 ;  MSE(train) = 0.0143 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0101 ;  MSE(train) = 0.0101 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0052 ;  MSE(train) = 0.0052 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0044 ;  MSE(train) = 0.0044 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0035 ;  MSE(train) = 0.0035 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0033 ;  MSE(train) = 0.0033 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0030 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0295 ;  MSE(train) = 0.0295 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0183 ;  MSE(train) = 0.0183 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0225 ;  MSE(train) = 0.0225 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0107 ;  MSE(train) = 0.0107 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0090 ;  MSE(train) = 0.0090 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0060 ;  MSE(train) = 0.0060 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0050 ;  MSE(train) = 0.0050 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0044 ;  MSE(train) = 0.0044 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0040 ;  MSE(train) = 0.0040 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0036 ;  MSE(train) = 0.0036 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0033 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0284 ;  MSE(train) = 0.0284 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0171 ;  MSE(train) = 0.0171 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0228 ;  MSE(train) = 0.0228 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0098 ;  MSE(train) = 0.0098 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0081 ;  MSE(train) = 0.0081 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0060 ;  MSE(train) = 0.0060 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0050 ;  MSE(train) = 0.0050 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0044 ;  MSE(train) = 0.0044 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0039 ;  MSE(train) = 0.0039 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0035 ;  MSE(train) = 0.0035 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0033 ;  MSE(t

  0%|          | 0/60 [00:00<?, ?it/s]

epoch 1/60   |   iter. 1/1 : 
MSE(batch) = 0.0298 ;  MSE(train) = 0.0298 ;  MSE(test) = [nan]
epoch 2/60   |   iter. 1/1 : 
MSE(batch) = 0.0183 ;  MSE(train) = 0.0183 ;  MSE(test) = [nan]
epoch 3/60   |   iter. 1/1 : 
MSE(batch) = 0.0232 ;  MSE(train) = 0.0232 ;  MSE(test) = [nan]
epoch 4/60   |   iter. 1/1 : 
MSE(batch) = 0.0096 ;  MSE(train) = 0.0096 ;  MSE(test) = [nan]
epoch 5/60   |   iter. 1/1 : 
MSE(batch) = 0.0078 ;  MSE(train) = 0.0078 ;  MSE(test) = [nan]
epoch 6/60   |   iter. 1/1 : 
MSE(batch) = 0.0061 ;  MSE(train) = 0.0061 ;  MSE(test) = [nan]
epoch 7/60   |   iter. 1/1 : 
MSE(batch) = 0.0051 ;  MSE(train) = 0.0051 ;  MSE(test) = [nan]
epoch 8/60   |   iter. 1/1 : 
MSE(batch) = 0.0045 ;  MSE(train) = 0.0045 ;  MSE(test) = [nan]
epoch 9/60   |   iter. 1/1 : 
MSE(batch) = 0.0040 ;  MSE(train) = 0.0040 ;  MSE(test) = [nan]
epoch 10/60   |   iter. 1/1 : 
MSE(batch) = 0.0037 ;  MSE(train) = 0.0037 ;  MSE(test) = [nan]
epoch 11/60   |   iter. 1/1 : 
MSE(batch) = 0.0034 ;  MSE(t

KeyboardInterrupt: 

In [11]:
# Store
np.savez(output_folder + 'optim_lorenz_catsize.npz', 
        transform_optim = transform_optim,
        mse_optim = mse_optim,
        k = k,
        learning_rate_factor = learning_rate_factor,
        regul = regul,
        n_iter = n_iter,
        )