In [1]:
import torch
import torch.nn as nn
from params import par
from torch.autograd import Variable
from torch.nn.init import kaiming_normal_, orthogonal_
import numpy as np
from CfC.torch_cfc import Cfc
import random

SEED = 0

torch.manual_seed(SEED)
random.seed(SEED)
np.random.seed(SEED)


CFC = {
    "epochs": 100,
    "clipnorm": 0,
    "hidden_size": 448,
    "base_lr": 0.002,
    "decay_lr": 0.97,
    "backbone_activation": "silu",
    "backbone_units": 64,
    "backbone_layers": 1,
    "backbone_dr": 0.0,
    "weight_decay": 0.0001,
    "tau": 10,
    "batch_size": 8,
    "optim": "adamw",
    "init": 0.84,
    "use_mixed": False,
}

def conv(batchNorm, in_planes, out_planes, kernel_size=3, stride=1, dropout=0):
    if batchNorm:
        return nn.Sequential(
            nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=False),
            nn.BatchNorm2d(out_planes),
            nn.LeakyReLU(0.1, inplace=True),
            nn.Dropout(dropout)#, inplace=True)
        )
    else:
        return nn.Sequential(
            nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True),
            nn.LeakyReLU(0.1, inplace=True),
            nn.Dropout(dropout)#, inplace=True)
        )

class DeepVO(nn.Module):
    def __init__(self, imsize1, imsize2, batchNorm=True):
        super(DeepVO,self).__init__()
        # CNN
        self.batchNorm = batchNorm
        self.clip = par.clip
        self.conv1   = conv(self.batchNorm,   2,   64, kernel_size=7, stride=2, dropout=par.conv_dropout[0])
        self.conv2   = conv(self.batchNorm,  64,  128, kernel_size=5, stride=2, dropout=par.conv_dropout[1])
        self.conv3   = conv(self.batchNorm, 128,  256, kernel_size=5, stride=2, dropout=par.conv_dropout[2])
        self.conv3_1 = conv(self.batchNorm, 256,  256, kernel_size=3, stride=1, dropout=par.conv_dropout[3])
        self.conv4   = conv(self.batchNorm, 256,  512, kernel_size=3, stride=2, dropout=par.conv_dropout[4])
        self.conv4_1 = conv(self.batchNorm, 512,  512, kernel_size=3, stride=1, dropout=par.conv_dropout[5])
        self.conv5   = conv(self.batchNorm, 512,  512, kernel_size=3, stride=2, dropout=par.conv_dropout[6])
        self.conv5_1 = conv(self.batchNorm, 512,  512, kernel_size=3, stride=1, dropout=par.conv_dropout[7])
        self.conv6   = conv(self.batchNorm, 512, 1024, kernel_size=3, stride=2, dropout=par.conv_dropout[8])
        # Comput the shape based on diff image size
        __tmp = Variable(torch.zeros(1, 2, imsize1, imsize2))
        __tmp = self.encode_image(__tmp)

        # RNN
        # self.rnn = nn.LSTM(
        #             input_size=int(np.prod(__tmp.size())), 
        #             hidden_size=par.rnn_hidden_size, 
        #             num_layers=2, 
        #             dropout=par.rnn_dropout_between, 
        #             batch_first=True)
        
        self.rnn = Cfc(in_features=int(np.prod(__tmp.size())),
                      hidden_size=par.rnn_hidden_size,
                      out_feature=6,
                      return_sequences=True,
                      hparams=CFC)
        
        
        # self.rnn_drop_out = nn.Dropout(par.rnn_dropout_out)
        # self.linear = nn.Linear(in_features=par.rnn_hidden_size, out_features=6)

        # Initilization
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d) or isinstance(m, nn.Linear):
                kaiming_normal_(m.weight.data)
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.LSTM):
                # layer 1
                kaiming_normal_(m.weight_ih_l0)  #orthogonal_(m.weight_ih_l0)
                kaiming_normal_(m.weight_hh_l0)
                m.bias_ih_l0.data.zero_()
                m.bias_hh_l0.data.zero_()
                # Set forget gate bias to 1 (remember)
                n = m.bias_hh_l0.size(0)
                start, end = n//4, n//2
                m.bias_hh_l0.data[start:end].fill_(1.)

                # layer 2
                kaiming_normal_(m.weight_ih_l1)  #orthogonal_(m.weight_ih_l1)
                kaiming_normal_(m.weight_hh_l1)
                m.bias_ih_l1.data.zero_()
                m.bias_hh_l1.data.zero_()
                n = m.bias_hh_l1.size(0)
                start, end = n//4, n//2
                m.bias_hh_l1.data[start:end].fill_(1.)

            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()


    def forward(self, x, times): 
        # x: (batch, seq_len, channel, width, height)
        # stack_image
        x = torch.cat(( x[:, :-1], x[:, 1:]), dim=2)
        batch_size = x.size(0)
        seq_len = x.size(1)
        # CNN
        x = x.view(batch_size*seq_len, x.size(2), x.size(3), x.size(4))
        x = self.encode_image(x)
        x = x.view(batch_size, seq_len, -1)
        
#         print("Input shape to RNN: ", x.shape)
        
        t_elapsed = times[:, 1:] - times[:, :-1]
        t_fill = torch.zeros(times.size(0), 1, device=x.device)
        t = torch.cat((t_fill, t_elapsed), dim=1)

        t = t * CFC["tau"]

        # RNN
        out = self.rnn(x, t)
        # out = self.rnn_drop_out(out)
        # out = self.linear(out)
        return out
        

    def encode_image(self, x):
        out_conv2 = self.conv2(self.conv1(x))
        out_conv3 = self.conv3_1(self.conv3(out_conv2))
        out_conv4 = self.conv4_1(self.conv4(out_conv3))
        out_conv5 = self.conv5_1(self.conv5(out_conv4))
        out_conv6 = self.conv6(out_conv5)
        return out_conv6

    def weight_parameters(self):
        return [param for name, param in self.named_parameters() if 'weight' in name]

    def bias_parameters(self):
        return [param for name, param in self.named_parameters() if 'bias' in name]

    def get_loss(self, x, times, y):
        predicted = self.forward(x, times)
        y = y[:, 1:, :]  # (batch, seq, dim_pose)
        # Weighted MSE Loss
#         print("Predicted shape: ", predicted.shape)
#         print("Y shape: ", y.shape)
        angle_loss = torch.nn.functional.mse_loss(predicted[:,:,:3], y[:,:,:3])
        translation_loss = torch.nn.functional.mse_loss(predicted[:,:,3:], y[:,:,3:])
        loss = (100 * angle_loss + translation_loss)
        return loss

    def step(self, x, times, y, optimizer):
        optimizer.zero_grad()
        loss = self.get_loss(x, times, y)
        loss.backward()
        if self.clip != None:
            torch.nn.utils.clip_grad_norm(self.rnn.parameters(), self.clip)
        optimizer.step()
        return loss


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# %load Dataloader_loss.py

from params import par
import cv2
import math

import numpy as np
import time
import torch

import os
from torch.autograd import Variable
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.nn.modules import loss
from torch import functional as F


###############################################################
#Transform Rotation Matrix to Euler Angle
###############################################################
# Checks if a matrix is a valid rotation matrix.
def isRotationMatrix(R) :
    Rt = np.transpose(R)
    shouldBeIdentity = np.dot(Rt, R)
    I = np.identity(3, dtype = R.dtype)
    n = np.linalg.norm(I - shouldBeIdentity)
    return n < 1e-6
 
 
# Calculates rotation matrix to euler angles
# The result is the same as MATLAB except the order
# of the euler angles ( x and z are swapped ).
def rotationMatrixToEulerAngles(R) :
 
    assert(isRotationMatrix(R))
     
    sy = math.sqrt(R[0,0] * R[0,0] +  R[1,0] * R[1,0])
     
    singular = sy < 1e-6
 
    if  not singular :
        x = math.atan2(R[2,1] , R[2,2])
        y = math.atan2(-R[2,0], sy)
        z = math.atan2(R[1,0], R[0,0])
    else :
        x = math.atan2(-R[1,2], R[1,1])
        y = math.atan2(-R[2,0], sy)
        z = 0
 
    return np.array([x, y, z])

###############################################################
#DataLoader
###############################################################

#only fixed seq_len is used
class KITTI_Data(Dataset):
    def __init__(self,folder,seq_len): 
        
        #only store images address in dataloader 
        root_train = 'dataset/dataset/sequences/{}/image_0'.format(folder)
        imgs = os.listdir(root_train)
        self.imgs = [os.path.join(root_train, img) for img in imgs]
        self.imgs.sort()
        self.GT = readGT('dataset/dataset/poses/{}.txt'.format(folder))
        
        self.seq_len = seq_len

    def __getitem__(self, index):
        #check index, CAUTION:the start/end of frames should be determined by GroundTruth file
        try:
        #Check the boundary, the lower boundary = frames-(seqs-1)-#StackNum
            self.GT[index+self.seq_len]
        except Exception:
            print("Error:Index OutofRange")
        filenames = []
        
        #load path to images,read image and resemble as RGB
        #NumofImgs = seqs + #StackNum
        filenames = [self.imgs[index+i] for i in range(self.seq_len+1)]
        images = [np.asarray(cv2.imread(img),dtype=np.float32) for img in filenames]

        #resemble images as RGB
        images = [img[:, :, (2, 1, 0)] for img in images]
        
        #Transpose the image that channels num. as first dimension
        images = [np.transpose(img,(2,0,1)) for img in images]
        images = [torch.from_numpy(img) for img in images]
        
        #stack per 2 images
        images = [np.concatenate((images[k],images[k+1]),axis = 0) for k in range(len(images)-1)]
                
        
        #prepare ground truth poses data

        #Stack the images for seqs
        return np.stack(images,axis = 0), self.GT[index:index+par.seq_len,:]

    def __len__(self):
        return self.GT.shape[0]-1-par.seq_len-1

#read groundtruth and return np.array
def readGT(root):
    with open(root, 'r') as posefile:
        #read ground truth
        GT = []
        for one_line in posefile:
            one_line = one_line.split(' ')
            one_line = [float(pose) for pose in one_line]
            #r = np.empty((3,3))
            #r[:] = ([one_line[0:3],one_line[4:7],one_line[8:11]])
            gt = np.append(rotationMatrixToEulerAngles(np.matrix([one_line[0:3], one_line[4:7], one_line[8:11]])), np.array([one_line[3], one_line[7], one_line[11]]))
            GT.append(gt)
    return np.array(GT, dtype=np.float32)
        
###############################################################
#Custom Loss Function
###############################################################

class DeepvoLoss(loss._Loss):
    def __init__(self, size_average=True, reduce=True):
        super(DeepvoLoss, self).__init__()    

    def forward(self, input, target):
        return F.mse_loss(input[0:3], target[0:3], size_average=self.size_average, reduce=self.reduce)+100 * F.mse_loss(input[3:6], target[3:6], size_average=self.size_average, reduce=self.reduce)


In [3]:
import os
import glob
import numpy as np
import time
from params import par
from torchvision import transforms
from PIL import Image
import torch
import math
from helper import R_to_angle

def clean_unused_images():
    seq_frame = {'00': ['000', '004540'],
                '01': ['000', '001100'],
                '02': ['000', '004660'],
                '03': ['000', '000800'],
                '04': ['000', '000270'],
                '05': ['000', '002760'],
                '06': ['000', '001100'],
                '07': ['000', '001100'],
                '08': ['001100', '005170'],
                '09': ['000', '001590'],
                '10': ['000', '001200']
                }
    for dir_id, img_ids in seq_frame.items():
        dir_path = '{}{}/'.format(par.image_dir, dir_id)
        if not os.path.exists(dir_path):
            continue

        print('Cleaning {} directory'.format(dir_id))
        start, end = img_ids
        start, end = int(start), int(end)
        for idx in range(0, start):
            img_name = '{:010d}.png'.format(idx)
            img_path = '{}{}/{}'.format(par.image_dir, dir_id, img_name)
            if os.path.isfile(img_path):
                os.remove(img_path)
        for idx in range(end+1, 10000):
            img_name = '{:010d}.png'.format(idx)
            img_path = '{}{}/{}'.format(par.image_dir, dir_id, img_name)
            if os.path.isfile(img_path):
                os.remove(img_path)
                
        clean_times = []
        original_times = []
        with open(f'{dir_path}/times.txt', 'r') as f:
            for i, line in enumerate(f.readlines()):
                if i >= start and i <= end:
                    clean_times.append(line)
        with open(f'{dir_path}/clean_times.txt', 'w') as f:
            for line in clean_times:
                f.write(line)
            


# transform poseGT [R|t] to [theta_x, theta_y, theta_z, x, y, z]
# save as .npy file
def create_pose_data():
    info = {'00': [0, 4540], '01': [0, 1100], '02': [0, 4660], '03': [0, 800], '04': [0, 270], '05': [0, 2760], '06': [0, 1100], '07': [0, 1100], '08': [1100, 5170], '09': [0, 1590], '10': [0, 1200]}
    start_t = time.time()
    for video in info.keys():
        fn = '{}{}.txt'.format(par.pose_dir, video)
        print('Transforming {}...'.format(fn))
        with open(fn) as f:
            lines = [line.split('\n')[0] for line in f.readlines()]
            poses = [R_to_angle([float(value) for value in l.split(' ')]) for l in lines]  # list of pose (pose=list of 12 floats)
            poses = np.array(poses)
            base_fn = os.path.splitext(fn)[0]
            np.save(base_fn+'.npy', poses)
            print('Video {}: shape={}'.format(video, poses.shape))
    print('elapsed time = {}'.format(time.time()-start_t))


def calculate_bw_mean_std(image_path_list, minus_point_5=False):
    n_images = len(image_path_list)
    cnt_pixels = 0
    print('Numbers of frames in training dataset: {}'.format(n_images))
    mean_np = 0.0
    mean_tensor = 0.0
    to_tensor = transforms.ToTensor()

    image_sequence = []
    for idx, img_path in enumerate(image_path_list):
        print('{} / {}'.format(idx, n_images), end='\r')
        img_as_img = Image.open(img_path)
        img_as_tensor = to_tensor(img_as_img)
        if minus_point_5:
            img_as_tensor = img_as_tensor - 0.5
        img_as_np = np.array(img_as_img)
        # img_as_np = np.rollaxis(img_as_np, 2, 0)
        cnt_pixels += img_as_np.shape[0]*img_as_np.shape[1]

        mean_tensor += float(torch.sum(img_as_tensor))
        mean_np += float(np.sum(img_as_np))

    mean_tensor = mean_tensor / cnt_pixels
    mean_np = mean_np / cnt_pixels
    print('mean_tensor = ', mean_tensor)
    print('mean_np = ', mean_np)

    std_tensor = 0.0
    std_np = 0.0
    for idx, img_path in enumerate(image_path_list):
        print('{} / {}'.format(idx, n_images), end='\r')
        img_as_img = Image.open(img_path)
        img_as_tensor = to_tensor(img_as_img)
        if minus_point_5:
            img_as_tensor = img_as_tensor - 0.5
        img_as_np = np.array(img_as_img)
        # img_as_np = np.rollaxis(img_as_np, 2, 0)

        tmp = (img_as_tensor - mean_tensor)**2
        std_tensor += float(torch.sum(tmp))
        tmp = (img_as_np - mean_np)**2
        std_np += float(np.sum(tmp))

    std_tensor = math.sqrt(std_tensor / cnt_pixels)
    std_np = math.sqrt(std_np / cnt_pixels)
    print('std_tensor = ', std_tensor)
    print('std_np = ', std_np)


clean_unused_images()
run = False
if run:
    create_pose_data()

    # Calculate Black & White means of images in training videos
    train_video = ['00', '02', '08', '09', '06', '04', '10']
    image_path_list = []
    for folder in train_video:
        image_path_list += glob.glob('dataset/dataset/sequences/{}/image_0/*.png'.format(folder))
    calculate_bw_mean_std(image_path_list, minus_point_5=True)



Cleaning 00 directory
Cleaning 01 directory
Cleaning 02 directory
Cleaning 03 directory
Cleaning 04 directory
Cleaning 05 directory
Cleaning 06 directory
Cleaning 07 directory
Cleaning 08 directory
Cleaning 09 directory
Cleaning 10 directory


In [None]:
import torch
from torch.utils.data import DataLoader
import numpy as np
import os
import time
import pandas as pd
from params import par
from data_helper import get_data_info, SortedRandomBatchSampler, ImageSequenceDataset, get_partition_data_info


# Write all hyperparameters to record_path
mode = 'a' if par.resume else 'w'
with open(par.record_path, mode) as f:
    f.write('\n'+'='*50 + '\n')
    f.write('\n'.join("%s: %s" % item for item in vars(par).items()))
    f.write('\n'+'='*50 + '\n')

# Prepare Data
if os.path.isfile(par.train_data_info_path) and os.path.isfile(par.valid_data_info_path):
    print('Load data info from {}'.format(par.train_data_info_path))
    train_df = pd.read_pickle(par.train_data_info_path)
    valid_df = pd.read_pickle(par.valid_data_info_path)
else:
    print('Create new data info')
    if par.partition != None:
        partition = par.partition
        train_df, valid_df = get_partition_data_info(partition, par.train_video, par.seq_len, overlap=1, sample_times=par.sample_times, shuffle=True, sort=True)
    else:
        train_df = get_data_info(folder_list=par.train_video, seq_len_range=par.seq_len, overlap=1, sample_times=par.sample_times)
        valid_df = get_data_info(folder_list=par.valid_video, seq_len_range=par.seq_len, overlap=1, sample_times=par.sample_times)
    # save the data info
    train_df.to_pickle(par.train_data_info_path)
    valid_df.to_pickle(par.valid_data_info_path)

train_sampler = SortedRandomBatchSampler(train_df, par.batch_size, drop_last=True)
train_dataset = ImageSequenceDataset(train_df, par.resize_mode, (par.img_w, par.img_h), par.img_means, par.img_stds, par.minus_point_5)
train_dl = DataLoader(train_dataset, batch_sampler=train_sampler, num_workers=par.n_processors, pin_memory=par.pin_mem)

valid_sampler = SortedRandomBatchSampler(valid_df, par.batch_size, drop_last=True)
valid_dataset = ImageSequenceDataset(valid_df, par.resize_mode, (par.img_w, par.img_h), par.img_means, par.img_stds, par.minus_point_5)
valid_dl = DataLoader(valid_dataset, batch_sampler=valid_sampler, num_workers=par.n_processors, pin_memory=par.pin_mem)

print('Number of samples in training dataset: ', len(train_df.index))
print('Number of samples in validation dataset: ', len(valid_df.index))
print('='*50)


# Model
M_deepvo = DeepVO(par.img_h, par.img_w, par.batch_norm)
use_cuda = torch.cuda.is_available()
if use_cuda:
    print('CUDA used.')
    M_deepvo = M_deepvo.cuda()


# Load FlowNet weights pretrained with FlyingChairs
# NOTE: the pretrained model assumes image rgb values in range [-0.5, 0.5]
if par.pretrained_flownet and not par.resume:
    if use_cuda:
        pretrained_w = torch.load(par.pretrained_flownet)
    else:
        pretrained_w = torch.load(par.pretrained_flownet_flownet, map_location='cpu')
    print('Load FlowNet pretrained model')
    # Use only conv-layer-part of FlowNet as CNN for DeepVO
    model_dict = M_deepvo.state_dict()
    update_dict = {k: v for k, v in pretrained_w['state_dict'].items() if k in model_dict}
    model_dict.update(update_dict)
    M_deepvo.load_state_dict(model_dict)


# Create optimizer
if par.optim['opt'] == 'Adam':
    optimizer = torch.optim.Adam(M_deepvo.parameters(), lr=0.001, betas=(0.9, 0.999))
elif par.optim['opt'] == 'Adagrad':
    optimizer = torch.optim.Adagrad(M_deepvo.parameters(), lr=par.optim['lr'])
elif par.optim['opt'] == 'Cosine':
    optimizer = torch.optim.SGD(M_deepvo.parameters(), lr=par.optim['lr'])
    T_iter = par.optim['T']*len(train_dl)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_iter, eta_min=0, last_epoch=-1)

# Load trained DeepVO model and optimizer
if par.resume:
    M_deepvo.load_state_dict(torch.load(par.load_model_path))
    optimizer.load_state_dict(torch.load(par.load_optimizer_path))
    print('Load model from: ', par.load_model_path)
    print('Load optimizer from: ', par.load_optimizer_path)


# Train
print('Record loss in: ', par.record_path)
min_loss_t = 1e10
min_loss_v = 1e10
M_deepvo.train()
for ep in range(par.epochs):
    st_t = time.time()
    print('='*50)
    # Train
    M_deepvo.train()
    loss_mean = 0
    t_loss_list = []
    epoch_length = len(train_dl)
    for i, (_, t_x, t_times, t_y) in enumerate(train_dl):
        if use_cuda:
            t_x = t_x.cuda(non_blocking=par.pin_mem)
            t_y = t_y.cuda(non_blocking=par.pin_mem)
            t_times = t_times.cuda(non_blocking=par.pin_mem)
        ls = M_deepvo.step(t_x, t_times, t_y, optimizer).data.cpu().numpy()
        t_loss_list.append(float(ls))
        loss_mean += float(ls)
        if par.optim == 'Cosine':
            lr_scheduler.step()
        print(f'Training epoch {ep}: {(100.0*i/epoch_length):.2f}% - Loss = {float(ls)}', end='\r')
    print('\nTrain take {:.1f} sec'.format(time.time()-st_t))
    loss_mean /= len(train_dl)

    # Validation
    st_t = time.time()
    M_deepvo.eval()
    loss_mean_valid = 0
    v_loss_list = []
    for _, v_x, v_times, v_y in valid_dl:
        print(f'Validating epoch {ep}: {(100.0*i/epoch_length):.2f}%', end='\r')
        if use_cuda:
            v_x = v_x.cuda(non_blocking=par.pin_mem)
            v_y = v_y.cuda(non_blocking=par.pin_mem)
            v_times = v_times.cuda(non_blocking=par.pin_mem)
            v_ls = M_deepvo.get_loss(v_x, v_times, v_y).data.cpu().numpy()
        v_loss_list.append(float(v_ls))
        loss_mean_valid += float(v_ls)
    print('\nValid take {:.1f} sec'.format(time.time()-st_t))
    loss_mean_valid /= len(valid_dl)


    f = open(par.record_path, 'a')
    f.write('Epoch {}\ntrain loss mean: {}, std: {:.2f}\nvalid loss mean: {}, std: {:.2f}\n'.format(ep+1, loss_mean, np.std(t_loss_list), loss_mean_valid, np.std(v_loss_list)))
    print('Epoch {}\ntrain loss mean: {}, std: {:.2f}\nvalid loss mean: {}, std: {:.2f}\n'.format(ep+1, loss_mean, np.std(t_loss_list), loss_mean_valid, np.std(v_loss_list)))

    # Save model
    # save if the valid loss decrease
    check_interval = 1
    if loss_mean_valid < min_loss_v and ep % check_interval == 0:
        min_loss_v = loss_mean_valid
        print('Save model at ep {}, mean of valid loss: {}'.format(ep+1, loss_mean_valid))  # use 4.6 sec 
        torch.save(M_deepvo.state_dict(), par.save_model_path+'.valid')
        torch.save(optimizer.state_dict(), par.save_optimzer_path+'.valid')
    # save if the training loss decrease
    check_interval = 1
    if loss_mean < min_loss_t and ep % check_interval == 0:
        min_loss_t = loss_mean
        print('Save model at ep {}, mean of train loss: {}'.format(ep+1, loss_mean))
        torch.save(M_deepvo.state_dict(), par.save_model_path+'.train')
        torch.save(optimizer.state_dict(), par.save_optimzer_path+'.train')
    f.close()


Load data info from datainfo/train_df_t000102050809_v04060710_pNone_seq5x7_sample1.pickle
Number of samples in training dataset:  3735
Number of samples in validation dataset:  739
CUDA used.
Record loss in:  records/t000102050809_v04060710_im184x608_s5x7_b8_rnn448_optAdagrad_lr0.0005.txt
Train take 441.8 sec.79% - Loss = 1.3996717929840088
Valid take 31.1 sec 99.79%
Epoch 1
train loss mean: 4.618072225044726, std: 22.94
valid loss mean: 0.289768988778303, std: 0.06

Save model at ep 1, mean of valid loss: 0.289768988778303
Save model at ep 1, mean of train loss: 4.618072225044726
Train take 442.9 sec.79% - Loss = 0.8260985016822815
Valid take 33.5 sec 99.79%
Epoch 2
train loss mean: 1.1337435984816162, std: 0.22
valid loss mean: 0.16532632348301646, std: 0.03

Save model at ep 2, mean of valid loss: 0.16532632348301646
Save model at ep 2, mean of train loss: 1.1337435984816162
Train take 461.5 sec.79% - Loss = 0.69068264961242683
Valid take 31.3 sec 99.79%
Epoch 3
train loss mean: 0.8