# Searching neural architectures by using successive halving

In [6]:
import json 
import os
import numpy as np
import torch 
import torch.nn as nn 
import torch.nn.functional as F 
from catalyst import dl, utils 
from catalyst.contrib.datasets import MNIST 
from sklearn import datasets 
from sklearn.metrics import log_loss 
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import MinMaxScaler 
from torch import nn as nn 
from torch import optim 
from torch.utils.data import DataLoader, TensorDataset 
torch.manual_seed(0)

<torch._C.Generator at 0x7fd27733d1f0>

In [7]:
class MLP(nn.Module):
    def __init__(self, input_layer_size, output_layer_size, layer_configuration, activation_type='relu'):
        super(MLP, self).__init__()
        self.fully_connected_layers = nn.ModuleDict()
        self.activation_type = activation_type
        hidden_layer_number = 0
        for hidden_layer_idx in range(len(layer_configuration)):
            if hidden_layer_idx == 0:
                self.fully_connected_layers[str(hidden_layer_number)] = nn.Linear(
                    input_layer_size,
                    layer_configuration[hidden_layer_idx]
                )
                hidden_layer_number += 1
            if hidden_layer_idx == len(layer_configuration) - 1:
                self.fully_connected_layers[str(hidden_layer_number)] = nn.Linear(
                    layer_configuration[hidden_layer_idx],
                    output_layer_size
                )
            else:
                self.fully_connected_layers[str(hidden_layer_number)] = nn.Linear(
                    layer_configuration[hidden_layer_idx],
                    layer_configuration[hidden_layer_idx+1]
                )
                hidden_layer_number += 1
                
    def forward(self, x):
        for fc_key in self.fully_connected_layers:
            x = self.fully_connected_layers[fc_key](x)
            if fc_key != str(len(self.fully_connected_layers) -1):
                x = F.relu(x)
            else:
                x = F.softmax(x)
        return x

In [8]:
def train_and_evaluate_mlp(
    trial_number, 
    layer_configuration,
    epochs, 
    input_layer_size,
    output_layer_size,
    load_on_stage_start=False,
    best_or_last='last',
    verbose=False
):
    criterion = nn.CrossEntropyLoss()
    runner = dl.SupervisedRunner(
        input_key="features", output_key="logits", target_key="targets", loss_key="loss"
    )
    model = MLP(
        input_layer_size=input_layer_size,
        layer_configuration=layer_configuration,
        output_layer_size=output_layer_size,
    )
    optimizer = optim.Adam(model.parameters(), lr=0.02)
    checkpoint_logdir = "logs/trial_{}".format(trial_number)
    runner.train(
        model=model, 
        criterion=criterion,
        optimizer=optimizer, 
        loaders=loaders,
        num_epochs=epochs,
        callbacks=[
            dl.CheckpointCallback(
                logdir=checkpoint_logdir,
                loader_key="valid",
                metric_key="loss",
                mode="all",
                load_on_stage_start="last_full" if load_on_stage_start else None,
            )
        ],
        logdir="./logs",
        valid_loader="valid",
        valid_metric="loss", minimize_valid_metric=True,
        verbose=verbose
    )
    with open(os.path.join(checkpoint_logdir, '_metrics.json'), 'r') as f:
        metrics = json.load(f)
    if best_or_last == 'last':
        valid_loss = metrics['last']['_score_']
    else:
        valid_loss = metrics['best']['valid']['loss']
    return valid_loss

In [9]:
def get_random_configurations(number_of_configurations, rng):
    layer_configurations = []
    for _ in range(number_of_configurations):
        layer_configuration = []
        number_of_hidden_layers = rng.randint(low=1, high=6)
        for _ in range(number_of_hidden_layers):
            layer_configuration.append(
            rng.randint(low=2, high=100)
            )
            layer_configurations.append(layer_configuration)
    layer_configurations = np.array(layer_configurations)
    return layer_configurations

In [10]:
iris = datasets.load_iris()
iris_input_dataset = iris['data']
target = torch.from_numpy(iris['target'])
scaler = MinMaxScaler()
scaler.fit(iris_input_dataset)
iris_input_dataset = torch.from_numpy(
    scaler.transform(iris_input_dataset)
).float()
(X_train, X_test, y_train, y_test) = train_test_split(
    iris_input_dataset, target, test_size=0.33, random_state=42
)
training_dataset = TensorDataset(X_train, y_train)
validation_dataset = TensorDataset(X_test, y_test)
train_loader = DataLoader(
    training_dataset, batch_size=10, num_workers=1
)
valid_loader = DataLoader(
    validation_dataset, batch_size=10, num_workers=1
)
loaders = {"train": train_loader, "valid": valid_loader}


In [35]:
rng = np.random.RandomState(1234)
number_of_configurations = 20
layer_configurations = get_random_configurations(
    number_of_configurations, rng
)
successive_halving_epochs = [5, 5, 5]


  layer_configurations = np.array(layer_configurations)


In [36]:
layer_configurations

array([list([40, 55, 78, 26]), list([40, 55, 78, 26]),
       list([40, 55, 78, 26]), list([40, 55, 78, 26]), list([17, 51]),
       list([17, 51]), list([28, 32]), list([28, 32]),
       list([32, 28, 60, 94]), list([32, 28, 60, 94]),
       list([32, 28, 60, 94]), list([32, 28, 60, 94]), list([75]),
       list([78, 39, 36]), list([78, 39, 36]), list([78, 39, 36]),
       list([69, 13, 2]), list([69, 13, 2]), list([69, 13, 2]),
       list([77, 82]), list([77, 82]), list([4, 21, 14, 67]),
       list([4, 21, 14, 67]), list([4, 21, 14, 67]),
       list([4, 21, 14, 67]), list([83, 16, 73, 62]),
       list([83, 16, 73, 62]), list([83, 16, 73, 62]),
       list([83, 16, 73, 62]), list([48, 30, 83]), list([48, 30, 83]),
       list([48, 30, 83]), list([98, 14, 71, 97, 33]),
       list([98, 14, 71, 97, 33]), list([98, 14, 71, 97, 33]),
       list([98, 14, 71, 97, 33]), list([98, 14, 71, 97, 33]),
       list([91, 86]), list([91, 86]), list([43]), list([58]),
       list([72, 58, 88, 46

In [46]:
trial_numbers = np.array(range(len(layer_configurations)))
for succesive_idx, successive_halving_epoch in enumerate(successive_halving_epochs):
    valid_losses = []
    for idx, layer_configuration in enumerate(layer_configurations):
        trial_number = trial_numbers[idx]
        valid_loss = train_and_evaluate_mlp(
            trial_number, 
            input_layer_size=iris_input_dataset.shape[1], 
            output_layer_size=3,
            layer_configuration=layer_configuration,
            epochs=successive_halving_epoch,
            load_on_stage_start=False if succesive_idx==0 else True
        )
        valid_losses.append(valid_loss)
    if succesive_idx != len(successive_halving_epochs) - 1:
        succesive_halved_configurations = np.argsort(valid_losses)[:int(len(valid_losses)/2)]
        layer_configurations = layer_configurations[
            succesive_halved_configurations
        ]
        trial_numbers = trial_numbers[
            succesive_halved_configurations
        ]


  x = F.softmax(x)


train (1/5) loss: 1.027400243282318 | loss/mean: 1.027400243282318 | loss/std: 0.0779187769744518 | lr: 0.02 | momentum: 0.9
valid (1/5) loss: 0.8113264918327332 | loss/mean: 0.8113264918327332 | loss/std: 0.08662349382978496 | lr: 0.02 | momentum: 0.9
* Epoch (1/5) 
train (2/5) loss: 0.8243094205856323 | loss/mean: 0.8243094205856323 | loss/std: 0.060180884949187455 | lr: 0.02 | momentum: 0.9
valid (2/5) loss: 0.7436487555503846 | loss/mean: 0.7436487555503846 | loss/std: 0.08704131286976208 | lr: 0.02 | momentum: 0.9
* Epoch (2/5) 
train (3/5) loss: 0.6817425072193144 | loss/mean: 0.6817425072193144 | loss/std: 0.07159089520789767 | lr: 0.02 | momentum: 0.9
valid (3/5) loss: 0.5746945381164551 | loss/mean: 0.5746945381164551 | loss/std: 0.02605368067605671 | lr: 0.02 | momentum: 0.9
* Epoch (3/5) 
train (4/5) loss: 0.6983911335468292 | loss/mean: 0.6983911335468292 | loss/std: 0.1832744057593382 | lr: 0.02 | momentum: 0.9
valid (4/5) loss: 0.7424887418746948 | loss/mean: 0.7424887418

valid (5/5) loss: 0.5785036325454712 | loss/mean: 0.5785036325454712 | loss/std: 0.030418574688565656 | lr: 0.02 | momentum: 0.9
* Epoch (5/5) 
Top best models:
logs/trial_5/train.5.pth	0.5785
train (1/5) loss: 1.0195854365825652 | loss/mean: 1.0195854365825652 | loss/std: 0.08269525133056098 | lr: 0.02 | momentum: 0.9
valid (1/5) loss: 0.8041297197341919 | loss/mean: 0.8041297197341919 | loss/std: 0.13818679676860227 | lr: 0.02 | momentum: 0.9
* Epoch (1/5) 
train (2/5) loss: 0.8240260422229767 | loss/mean: 0.8240260422229767 | loss/std: 0.047567963979279884 | lr: 0.02 | momentum: 0.9
valid (2/5) loss: 0.765532660484314 | loss/mean: 0.765532660484314 | loss/std: 0.10660979907838541 | lr: 0.02 | momentum: 0.9
* Epoch (2/5) 
train (3/5) loss: 0.765397322177887 | loss/mean: 0.765397322177887 | loss/std: 0.10430373544679697 | lr: 0.02 | momentum: 0.9
valid (3/5) loss: 0.5982219576835632 | loss/mean: 0.5982219576835632 | loss/std: 0.030304099478018758 | lr: 0.02 | momentum: 0.9
* Epoch (3/

train (5/5) loss: 0.6205336928367615 | loss/mean: 0.6205336928367615 | loss/std: 0.06386394934882783 | lr: 0.02 | momentum: 0.9
valid (5/5) loss: 0.5861230731010437 | loss/mean: 0.5861230731010437 | loss/std: 0.05499827930587826 | lr: 0.02 | momentum: 0.9
* Epoch (5/5) 
Top best models:
logs/trial_11/train.5.pth	0.5861
train (1/5) loss: 1.0067299783229828 | loss/mean: 1.0067299783229828 | loss/std: 0.08846333420405453 | lr: 0.02 | momentum: 0.9
valid (1/5) loss: 0.790772306919098 | loss/mean: 0.790772306919098 | loss/std: 0.12917237030046028 | lr: 0.02 | momentum: 0.9
* Epoch (1/5) 
train (2/5) loss: 0.763530445098877 | loss/mean: 0.763530445098877 | loss/std: 0.07420942254634531 | lr: 0.02 | momentum: 0.9
valid (2/5) loss: 0.7069389700889588 | loss/mean: 0.7069389700889588 | loss/std: 0.1059019504419259 | lr: 0.02 | momentum: 0.9
* Epoch (2/5) 
train (3/5) loss: 0.6836718142032623 | loss/mean: 0.6836718142032623 | loss/std: 0.053182049919354484 | lr: 0.02 | momentum: 0.9
valid (3/5) l

train (3/5) loss: 0.6638736128807068 | loss/mean: 0.6638736128807068 | loss/std: 0.09588428928968722 | lr: 0.02 | momentum: 0.9
valid (3/5) loss: 0.5710262298583985 | loss/mean: 0.5710262298583985 | loss/std: 0.039459398772705365 | lr: 0.02 | momentum: 0.9
* Epoch (3/5) 
train (4/5) loss: 0.6414228141307831 | loss/mean: 0.6414228141307831 | loss/std: 0.1041334886610407 | lr: 0.02 | momentum: 0.9
valid (4/5) loss: 0.5591137528419494 | loss/mean: 0.5591137528419494 | loss/std: 0.014152686473565086 | lr: 0.02 | momentum: 0.9
* Epoch (4/5) 
train (5/5) loss: 0.7585746884346009 | loss/mean: 0.7585746884346009 | loss/std: 0.14328182299098394 | lr: 0.02 | momentum: 0.9
valid (5/5) loss: 0.7714681267738343 | loss/mean: 0.7714681267738343 | loss/std: 0.13407366195673923 | lr: 0.02 | momentum: 0.9
* Epoch (5/5) 
Top best models:
logs/trial_2/train.4.pth	0.5591
=> Loading checkpoint logs/trial_3/last_full.pth
full checkpoint logs/trial_3/last_full.pth loaded (global epoch 5, stage train, epoch 5)

train (5/5) loss: 1.013823425769806 | loss/mean: 1.013823425769806 | loss/std: 0.17169892860617933 | lr: 0.02 | momentum: 0.9
valid (5/5) loss: 0.9318407058715821 | loss/mean: 0.9318407058715821 | loss/std: 0.18503469369154948 | lr: 0.02 | momentum: 0.9
* Epoch (5/5) 
Top best models:
logs/trial_0/train.2.pth	0.5528
=> Loading checkpoint logs/trial_1/last_full.pth
full checkpoint logs/trial_1/last_full.pth loaded (global epoch 10, stage train, epoch 5)
train (1/5) loss: 0.6725760400295258 | loss/mean: 0.6725760400295258 | loss/std: 0.07435665795370366 | lr: 0.02 | momentum: 0.9
valid (1/5) loss: 0.5893844366073608 | loss/mean: 0.5893844366073608 | loss/std: 0.04683240492347999 | lr: 0.02 | momentum: 0.9
* Epoch (1/5) 
train (2/5) loss: 0.5935502707958221 | loss/mean: 0.5935502707958221 | loss/std: 0.0635230975687885 | lr: 0.02 | momentum: 0.9
valid (2/5) loss: 0.552819037437439 | loss/mean: 0.552819037437439 | loss/std: 0.0014933981954153071 | lr: 0.02 | momentum: 0.9
* Epoch (2/5) 
tr

In [39]:
best_loss_idx = np.argmin(valid_losses)
best_layer_configuration = layer_configurations[best_loss_idx]
best_loss_trial_number = trial_numbers[best_loss_idx]


# Searching neural architectures by using Hyperband


In [48]:
import math


In [49]:
resource_per_conf = 30 # R
N = 3

In [51]:
r_i

1

In [52]:
s_max = int(math.log(resource_per_conf, N))
bracket_resource = (s_max + 1) * resource_per_conf 
bracket_best_valid_losses = []
bracket_best_layer_configuration = []
for s in range(s_max, -1, -1):
    number_of_configurations = int(
        (bracket_resource / resource_per_conf) * (N**s / (s+1))
    )
    r = resource_per_conf * N**-s
    layer_configurations = get_random_configurations(
        number_of_configurations, rng
    )
    trial_numbers = np.array(
        range(len(layer_configurations))
    )
    valid_losses = []
    for i in range(s+1):
        number_of_configurations_i = int(
        number_of_configurations * N**-i
        )
        r_i = int(r * N**i)
        valid_losses = []
        for idx, layer_configuration in enumerate(layer_configurations):
            trial_number = '{}_{}'.format(s, trial_numbers[idx])
            valid_loss = train_and_evaluate_mlp(
                trial_number, 
                input_layer_size=iris_input_dataset.shape[1], 
                output_layer_size=3,
                layer_configuration=layer_configuration,
                epochs=r_i, 
                load_on_stage_start=False if s==s_max else True
            )
            valid_losses.append(valid_loss)
            if succesive_idx != len(successive_halving_epochs) - 1:
                succesive_halved_configurations = np.argsort(
                valid_losses
                )[:int(number_of_configurations_i/N)]
                layer_configurations = layer_configurations[
                succesive_halved_configurations
                ]
                trial_numbers = trial_numbers[
                succesive_halved_configurations
                ]
                best_loss_idx = np.argmin(valid_losses)
                best_layer_configuration = layer_configurations[best_loss_idx]
                best_loss_trial_number = trial_numbers[best_loss_idx]
                bracket_best_valid_losses.append(
                    valid_losses[best_loss_idx]
                )
                bracket_best_layer_configuration.append(
                    best_layer_configuration
                )

  layer_configurations = np.array(layer_configurations)
  x = F.softmax(x)


train (1/1) loss: 1.051391726732254 | loss/mean: 1.051391726732254 | loss/std: 0.0539550765737218 | lr: 0.02 | momentum: 0.9
valid (1/1) loss: 0.8299834251403808 | loss/mean: 0.8299834251403808 | loss/std: 0.08110294077353315 | lr: 0.02 | momentum: 0.9
* Epoch (1/1) 
Top best models:
logs/trial_3_0/train.1.pth	0.8300
train (1/1) loss: 1.0814664721488954 | loss/mean: 1.0814664721488954 | loss/std: 0.027893235685592137 | lr: 0.02 | momentum: 0.9
valid (1/1) loss: 0.9067293524742126 | loss/mean: 0.9067293524742126 | loss/std: 0.07792100873397796 | lr: 0.02 | momentum: 0.9
* Epoch (1/1) 
Top best models:
logs/trial_3_1/train.1.pth	0.9067
train (1/1) loss: 1.0814664721488954 | loss/mean: 1.0814664721488954 | loss/std: 0.027893235685592137 | lr: 0.02 | momentum: 0.9
valid (1/1) loss: 0.9067293524742126 | loss/mean: 0.9067293524742126 | loss/std: 0.07792100873397796 | lr: 0.02 | momentum: 0.9
* Epoch (1/1) 
Top best models:
logs/trial_3_2/train.1.pth	0.9067
train (1/1) loss: 1.081466472148895

Top best models:
logs/trial_3_25/train.1.pth	0.9016
train (1/1) loss: 1.0620781064033507 | loss/mean: 1.0620781064033507 | loss/std: 0.07332028201210686 | lr: 0.02 | momentum: 0.9
valid (1/1) loss: 0.901595139503479 | loss/mean: 0.901595139503479 | loss/std: 0.09546978508254522 | lr: 0.02 | momentum: 0.9
* Epoch (1/1) 
Top best models:
logs/trial_3_26/train.1.pth	0.9016
train (1/1) loss: 1.0620781064033507 | loss/mean: 1.0620781064033507 | loss/std: 0.07332028201210686 | lr: 0.02 | momentum: 0.9
valid (1/1) loss: 0.901595139503479 | loss/mean: 0.901595139503479 | loss/std: 0.09546978508254522 | lr: 0.02 | momentum: 0.9
* Epoch (1/1) 
Top best models:
logs/trial_3_27/train.1.pth	0.9016
train (1/1) loss: 1.0634414911270142 | loss/mean: 1.0634414911270142 | loss/std: 0.022463030352864403 | lr: 0.02 | momentum: 0.9
valid (1/1) loss: 1.021214985847473 | loss/mean: 1.021214985847473 | loss/std: 0.00976961052483376 | lr: 0.02 | momentum: 0.9
* Epoch (1/1) 
Top best models:
logs/trial_3_28/tra

valid (1/1) loss: 0.9599687457084656 | loss/mean: 0.9599687457084656 | loss/std: 0.057690952672707616 | lr: 0.02 | momentum: 0.9
* Epoch (1/1) 
Top best models:
logs/trial_3_51/train.1.pth	0.9600
train (1/1) loss: 1.0636008858680728 | loss/mean: 1.0636008858680728 | loss/std: 0.030568292680545273 | lr: 0.02 | momentum: 0.9
valid (1/1) loss: 0.9630955457687378 | loss/mean: 0.9630955457687378 | loss/std: 0.031549133933960964 | lr: 0.02 | momentum: 0.9
* Epoch (1/1) 
Top best models:
logs/trial_3_52/train.1.pth	0.9631
train (1/1) loss: 1.0636008858680728 | loss/mean: 1.0636008858680728 | loss/std: 0.030568292680545273 | lr: 0.02 | momentum: 0.9
valid (1/1) loss: 0.9630955457687378 | loss/mean: 0.9630955457687378 | loss/std: 0.031549133933960964 | lr: 0.02 | momentum: 0.9
* Epoch (1/1) 
Top best models:
logs/trial_3_53/train.1.pth	0.9631
train (1/1) loss: 1.0025716722011566 | loss/mean: 1.0025716722011566 | loss/std: 0.08647023610162505 | lr: 0.02 | momentum: 0.9
valid (1/1) loss: 0.809821

train (1/1) loss: 1.0742929458618162 | loss/mean: 1.0742929458618162 | loss/std: 0.029582615433299916 | lr: 0.02 | momentum: 0.9
valid (1/1) loss: 0.954572606086731 | loss/mean: 0.954572606086731 | loss/std: 0.06602881935608881 | lr: 0.02 | momentum: 0.9
* Epoch (1/1) 
Top best models:
logs/trial_3_77/train.1.pth	0.9546
train (1/1) loss: 1.0742929458618162 | loss/mean: 1.0742929458618162 | loss/std: 0.029582615433299916 | lr: 0.02 | momentum: 0.9
valid (1/1) loss: 0.954572606086731 | loss/mean: 0.954572606086731 | loss/std: 0.06602881935608881 | lr: 0.02 | momentum: 0.9
* Epoch (1/1) 
Top best models:
logs/trial_3_78/train.1.pth	0.9546
train (1/1) loss: 1.0742929458618162 | loss/mean: 1.0742929458618162 | loss/std: 0.029582615433299916 | lr: 0.02 | momentum: 0.9
valid (1/1) loss: 0.954572606086731 | loss/mean: 0.954572606086731 | loss/std: 0.06602881935608881 | lr: 0.02 | momentum: 0.9
* Epoch (1/1) 
Top best models:
logs/trial_3_79/train.1.pth	0.9546
train (1/3) loss: 1.08146647214889

valid (2/3) loss: 0.7232501268386841 | loss/mean: 0.7232501268386841 | loss/std: 0.11895825687949907 | lr: 0.02 | momentum: 0.9
* Epoch (2/3) 
train (3/3) loss: 0.6206634521484375 | loss/mean: 0.6206634521484375 | loss/std: 0.05114971832475455 | lr: 0.02 | momentum: 0.9
valid (3/3) loss: 0.688126015663147 | loss/mean: 0.688126015663147 | loss/std: 0.1118424368812868 | lr: 0.02 | momentum: 0.9
* Epoch (3/3) 
Top best models:
logs/trial_3_8/train.3.pth	0.6881
train (1/3) loss: 0.9909456312656403 | loss/mean: 0.9909456312656403 | loss/std: 0.06749367465289 | lr: 0.02 | momentum: 0.9
valid (1/3) loss: 0.832857072353363 | loss/mean: 0.832857072353363 | loss/std: 0.0880200461176085 | lr: 0.02 | momentum: 0.9
* Epoch (1/3) 
train (2/3) loss: 0.8158363103866577 | loss/mean: 0.8158363103866577 | loss/std: 0.04578404675850827 | lr: 0.02 | momentum: 0.9
valid (2/3) loss: 0.7439068675041198 | loss/mean: 0.7439068675041198 | loss/std: 0.0638884975347535 | lr: 0.02 | momentum: 0.9
* Epoch (2/3) 
tra

train (1/3) loss: 1.0670855641365051 | loss/mean: 1.0670855641365051 | loss/std: 0.060743797626086664 | lr: 0.02 | momentum: 0.9
valid (1/3) loss: 0.8636133074760437 | loss/mean: 0.8636133074760437 | loss/std: 0.09040922684473221 | lr: 0.02 | momentum: 0.9
* Epoch (1/3) 
train (2/3) loss: 0.8315803050994872 | loss/mean: 0.8315803050994872 | loss/std: 0.07577417940949954 | lr: 0.02 | momentum: 0.9
valid (2/3) loss: 0.7643857717514038 | loss/mean: 0.7643857717514038 | loss/std: 0.13391919133844757 | lr: 0.02 | momentum: 0.9
* Epoch (2/3) 
train (3/3) loss: 0.670519894361496 | loss/mean: 0.670519894361496 | loss/std: 0.07979384121278028 | lr: 0.02 | momentum: 0.9
valid (3/3) loss: 0.6200530409812928 | loss/mean: 0.6200530409812928 | loss/std: 0.060327595476010035 | lr: 0.02 | momentum: 0.9
* Epoch (3/3) 
Top best models:
logs/trial_3_18/train.3.pth	0.6201
train (1/3) loss: 1.0670855641365051 | loss/mean: 1.0670855641365051 | loss/std: 0.060743797626086664 | lr: 0.02 | momentum: 0.9
valid 

train (3/3) loss: 0.8180306613445282 | loss/mean: 0.8180306613445282 | loss/std: 0.11844803908235249 | lr: 0.02 | momentum: 0.9
valid (3/3) loss: 0.7199756145477295 | loss/mean: 0.7199756145477295 | loss/std: 0.1486231114634434 | lr: 0.02 | momentum: 0.9
* Epoch (3/3) 
Top best models:
logs/trial_3_27/train.3.pth	0.7200
train (1/3) loss: 1.0624918937683105 | loss/mean: 1.0624918937683105 | loss/std: 0.037580839803150225 | lr: 0.02 | momentum: 0.9
valid (1/3) loss: 1.0096192359924316 | loss/mean: 1.0096192359924316 | loss/std: 0.03388443513250282 | lr: 0.02 | momentum: 0.9
* Epoch (1/3) 
train (2/3) loss: 0.9622569799423218 | loss/mean: 0.9622569799423218 | loss/std: 0.05483076132730155 | lr: 0.02 | momentum: 0.9
valid (2/3) loss: 0.8658477783203125 | loss/mean: 0.8658477783203125 | loss/std: 0.0961587246216078 | lr: 0.02 | momentum: 0.9
* Epoch (2/3) 
train (3/3) loss: 0.8591929078102112 | loss/mean: 0.8591929078102112 | loss/std: 0.0535840524564391 | lr: 0.02 | momentum: 0.9
valid (3/

valid (1/3) loss: 0.8062900424003601 | loss/mean: 0.8062900424003601 | loss/std: 0.14573709095643694 | lr: 0.02 | momentum: 0.9
* Epoch (1/3) 
train (2/3) loss: 0.8417986452579498 | loss/mean: 0.8417986452579498 | loss/std: 0.06539515043305431 | lr: 0.02 | momentum: 0.9
valid (2/3) loss: 0.7862774133682251 | loss/mean: 0.7862774133682251 | loss/std: 0.11010222441249054 | lr: 0.02 | momentum: 0.9
* Epoch (2/3) 
train (3/3) loss: 0.8120333373546601 | loss/mean: 0.8120333373546601 | loss/std: 0.061196653342135296 | lr: 0.02 | momentum: 0.9
valid (3/3) loss: 0.7755308628082276 | loss/mean: 0.7755308628082276 | loss/std: 0.1355782953930281 | lr: 0.02 | momentum: 0.9
* Epoch (3/3) 
Top best models:
logs/trial_3_37/train.3.pth	0.7755
train (1/3) loss: 0.999441921710968 | loss/mean: 0.999441921710968 | loss/std: 0.09624388253381844 | lr: 0.02 | momentum: 0.9
valid (1/3) loss: 0.8062900424003601 | loss/mean: 0.8062900424003601 | loss/std: 0.14573709095643694 | lr: 0.02 | momentum: 0.9
* Epoch (

train (3/3) loss: 0.804503345489502 | loss/mean: 0.804503345489502 | loss/std: 0.0818480579393475 | lr: 0.02 | momentum: 0.9
valid (3/3) loss: 0.6718388795852661 | loss/mean: 0.6718388795852661 | loss/std: 0.10255330939498926 | lr: 0.02 | momentum: 0.9
* Epoch (3/3) 
Top best models:
logs/trial_3_46/train.3.pth	0.6718
train (1/3) loss: 1.1046412944793702 | loss/mean: 1.1046412944793702 | loss/std: 0.018038048937616808 | lr: 0.02 | momentum: 0.9
valid (1/3) loss: 1.0977694988250732 | loss/mean: 1.0977694988250732 | loss/std: 0.0048224280023736275 | lr: 0.02 | momentum: 0.9
* Epoch (1/3) 
train (2/3) loss: 1.0704372882843018 | loss/mean: 1.0704372882843018 | loss/std: 0.03610918413269728 | lr: 0.02 | momentum: 0.9
valid (2/3) loss: 0.8587780117988586 | loss/mean: 0.8587780117988586 | loss/std: 0.06977666246427185 | lr: 0.02 | momentum: 0.9
* Epoch (2/3) 
train (3/3) loss: 0.804503345489502 | loss/mean: 0.804503345489502 | loss/std: 0.0818480579393475 | lr: 0.02 | momentum: 0.9
valid (3/3

valid (1/3) loss: 0.8349678993225098 | loss/mean: 0.8349678993225098 | loss/std: 0.11659642629789257 | lr: 0.02 | momentum: 0.9
* Epoch (1/3) 
train (2/3) loss: 0.8136796414852142 | loss/mean: 0.8136796414852142 | loss/std: 0.04494788734535127 | lr: 0.02 | momentum: 0.9
valid (2/3) loss: 0.7351164817810059 | loss/mean: 0.7351164817810059 | loss/std: 0.05467158015319404 | lr: 0.02 | momentum: 0.9
* Epoch (2/3) 
train (3/3) loss: 0.7232986032962799 | loss/mean: 0.7232986032962799 | loss/std: 0.0368016617551212 | lr: 0.02 | momentum: 0.9
valid (3/3) loss: 0.6741612553596497 | loss/mean: 0.6741612553596497 | loss/std: 0.04305730022005385 | lr: 0.02 | momentum: 0.9
* Epoch (3/3) 
Top best models:
logs/trial_3_56/train.3.pth	0.6742
train (1/3) loss: 1.0965473294258117 | loss/mean: 1.0965473294258117 | loss/std: 0.03417323765081367 | lr: 0.02 | momentum: 0.9
valid (1/3) loss: 1.0690630435943604 | loss/mean: 1.0690630435943604 | loss/std: 0.035063881651995005 | lr: 0.02 | momentum: 0.9
* Epoch

train (3/3) loss: 0.8305530726909638 | loss/mean: 0.8305530726909638 | loss/std: 0.04796676855422323 | lr: 0.02 | momentum: 0.9
valid (3/3) loss: 0.7669692516326905 | loss/mean: 0.7669692516326905 | loss/std: 0.09465822630905914 | lr: 0.02 | momentum: 0.9
* Epoch (3/3) 
Top best models:
logs/trial_3_65/train.3.pth	0.7670
train (1/3) loss: 1.0040422558784485 | loss/mean: 1.0040422558784485 | loss/std: 0.10481174949661555 | lr: 0.02 | momentum: 0.9
valid (1/3) loss: 0.8364074349403381 | loss/mean: 0.8364074349403381 | loss/std: 0.18836333794209817 | lr: 0.02 | momentum: 0.9
* Epoch (1/3) 
train (2/3) loss: 0.8683073282241821 | loss/mean: 0.8683073282241821 | loss/std: 0.07866512061680729 | lr: 0.02 | momentum: 0.9
valid (2/3) loss: 0.8049206376075745 | loss/mean: 0.8049206376075745 | loss/std: 0.14869401118251624 | lr: 0.02 | momentum: 0.9
* Epoch (2/3) 
train (3/3) loss: 0.8305530726909638 | loss/mean: 0.8305530726909638 | loss/std: 0.04796676855422323 | lr: 0.02 | momentum: 0.9
valid (

valid (1/3) loss: 0.8128466367721557 | loss/mean: 0.8128466367721557 | loss/std: 0.10897092209182026 | lr: 0.02 | momentum: 0.9
* Epoch (1/3) 
train (2/3) loss: 0.7762244999408722 | loss/mean: 0.7762244999408722 | loss/std: 0.05153957756917858 | lr: 0.02 | momentum: 0.9
valid (2/3) loss: 0.6667577624320984 | loss/mean: 0.6667577624320984 | loss/std: 0.04650856727078501 | lr: 0.02 | momentum: 0.9
* Epoch (2/3) 
train (3/3) loss: 0.6404120504856109 | loss/mean: 0.6404120504856109 | loss/std: 0.03424296602799238 | lr: 0.02 | momentum: 0.9
valid (3/3) loss: 0.6319876551628113 | loss/mean: 0.6319876551628113 | loss/std: 0.07467074758097611 | lr: 0.02 | momentum: 0.9
* Epoch (3/3) 
Top best models:
logs/trial_3_75/train.3.pth	0.6320
train (1/3) loss: 0.9844659924507142 | loss/mean: 0.9844659924507142 | loss/std: 0.0877576244616751 | lr: 0.02 | momentum: 0.9
valid (1/3) loss: 0.8128466367721557 | loss/mean: 0.8128466367721557 | loss/std: 0.10897092209182026 | lr: 0.02 | momentum: 0.9
* Epoch 

valid (5/10) loss: 1.171443247795105 | loss/mean: 1.171443247795105 | loss/std: 0.1851624349058624 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 
train (6/10) loss: 1.2414449214935306 | loss/mean: 1.2414449214935306 | loss/std: 0.1452966486891931 | lr: 0.02 | momentum: 0.9
valid (6/10) loss: 1.1714448928833008 | loss/mean: 1.1714448928833008 | loss/std: 0.18516406672882949 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 
train (7/10) loss: 1.2414449214935306 | loss/mean: 1.2414449214935306 | loss/std: 0.1452966486891931 | lr: 0.02 | momentum: 0.9
valid (7/10) loss: 1.1714448928833008 | loss/mean: 1.1714448928833008 | loss/std: 0.18516406672882949 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 
train (8/10) loss: 1.2414449214935306 | loss/mean: 1.2414449214935306 | loss/std: 0.1452966486891931 | lr: 0.02 | momentum: 0.9
valid (8/10) loss: 1.1714448928833008 | loss/mean: 1.1714448928833008 | loss/std: 0.18516406672882949 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 
train (9/10) loss: 1.2414449214935306 |

train (5/10) loss: 0.6182107985019684 | loss/mean: 0.6182107985019684 | loss/std: 0.07429524521892625 | lr: 0.02 | momentum: 0.9
valid (5/10) loss: 0.6240852475166321 | loss/mean: 0.6240852475166321 | loss/std: 0.08562663279987459 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 
train (6/10) loss: 0.6353544831275939 | loss/mean: 0.6353544831275939 | loss/std: 0.06721044282579969 | lr: 0.02 | momentum: 0.9
valid (6/10) loss: 0.5929071068763733 | loss/mean: 0.5929071068763733 | loss/std: 0.03645937037753808 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 
train (7/10) loss: 0.6077215075492859 | loss/mean: 0.6077215075492859 | loss/std: 0.054967542381635426 | lr: 0.02 | momentum: 0.9
valid (7/10) loss: 0.5848313450813294 | loss/mean: 0.5848313450813294 | loss/std: 0.02922986725767646 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 
train (8/10) loss: 0.6015100121498108 | loss/mean: 0.6015100121498108 | loss/std: 0.046516831719464365 | lr: 0.02 | momentum: 0.9
valid (8/10) loss: 0.5796471118927002 | loss/me

valid (4/10) loss: 0.7565695524215699 | loss/mean: 0.7565695524215699 | loss/std: 0.12903945590960691 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 
train (5/10) loss: 0.7085417926311494 | loss/mean: 0.7085417926311494 | loss/std: 0.0870106773641732 | lr: 0.02 | momentum: 0.9
valid (5/10) loss: 0.6169735312461853 | loss/mean: 0.6169735312461853 | loss/std: 0.07215264800028112 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 
train (6/10) loss: 0.6204716622829437 | loss/mean: 0.6204716622829437 | loss/std: 0.057492224655466904 | lr: 0.02 | momentum: 0.9
valid (6/10) loss: 0.5813478708267212 | loss/mean: 0.5813478708267212 | loss/std: 0.03981003190846282 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 
train (7/10) loss: 0.5881726145744324 | loss/mean: 0.5881726145744324 | loss/std: 0.04608421274225809 | lr: 0.02 | momentum: 0.9
valid (7/10) loss: 0.5836187601089478 | loss/mean: 0.5836187601089478 | loss/std: 0.041041117065106374 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 
train (8/10) loss: 0.62074356675

train (4/10) loss: 0.6184682309627533 | loss/mean: 0.6184682309627533 | loss/std: 0.052895150777958234 | lr: 0.02 | momentum: 0.9
valid (4/10) loss: 0.6381049990653992 | loss/mean: 0.6381049990653992 | loss/std: 0.07885579987805978 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 
train (5/10) loss: 0.6300998330116272 | loss/mean: 0.6300998330116272 | loss/std: 0.053420659357080236 | lr: 0.02 | momentum: 0.9
valid (5/10) loss: 0.5912232756614685 | loss/mean: 0.5912232756614685 | loss/std: 0.04872399718741284 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 
train (6/10) loss: 0.6750606238842011 | loss/mean: 0.6750606238842011 | loss/std: 0.08358640419691801 | lr: 0.02 | momentum: 0.9
valid (6/10) loss: 0.6490066885948181 | loss/mean: 0.6490066885948181 | loss/std: 0.0811349765324866 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 
train (7/10) loss: 0.6516779124736787 | loss/mean: 0.6516779124736787 | loss/std: 0.11906922478015003 | lr: 0.02 | momentum: 0.9
valid (7/10) loss: 0.6326017141342163 | loss/mea

valid (3/10) loss: 0.6249504804611206 | loss/mean: 0.6249504804611206 | loss/std: 0.06726345559207497 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 
train (4/10) loss: 0.6772634387016297 | loss/mean: 0.6772634387016297 | loss/std: 0.12740538748476352 | lr: 0.02 | momentum: 0.9
valid (4/10) loss: 0.5953487396240235 | loss/mean: 0.5953487396240235 | loss/std: 0.04027497366771443 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 
train (5/10) loss: 0.6248974740505219 | loss/mean: 0.6248974740505219 | loss/std: 0.06650668754781668 | lr: 0.02 | momentum: 0.9
valid (5/10) loss: 0.69268878698349 | loss/mean: 0.69268878698349 | loss/std: 0.10384862446077955 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 
train (6/10) loss: 0.6759368777275085 | loss/mean: 0.6759368777275085 | loss/std: 0.1048687980736014 | lr: 0.02 | momentum: 0.9
valid (6/10) loss: 0.660342025756836 | loss/mean: 0.660342025756836 | loss/std: 0.09733581773266917 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 
train (7/10) loss: 0.6845036923885345 | 

train (3/10) loss: 0.8352946102619172 | loss/mean: 0.8352946102619172 | loss/std: 0.06578662090051053 | lr: 0.02 | momentum: 0.9
valid (3/10) loss: 0.8046453356742859 | loss/mean: 0.8046453356742859 | loss/std: 0.07672457034081165 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 
train (4/10) loss: 0.8340168714523315 | loss/mean: 0.8340168714523315 | loss/std: 0.058609879995282096 | lr: 0.02 | momentum: 0.9
valid (4/10) loss: 0.8036665558815003 | loss/mean: 0.8036665558815003 | loss/std: 0.07839925830679348 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 
train (5/10) loss: 0.8349691867828368 | loss/mean: 0.8349691867828368 | loss/std: 0.059491842671784874 | lr: 0.02 | momentum: 0.9
valid (5/10) loss: 0.804456603527069 | loss/mean: 0.804456603527069 | loss/std: 0.07448748325550335 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 
train (6/10) loss: 0.8335818290710449 | loss/mean: 0.8335818290710449 | loss/std: 0.06185295471484458 | lr: 0.02 | momentum: 0.9
valid (6/10) loss: 0.8042678356170654 | loss/mean

valid (2/10) loss: 0.8185167074203491 | loss/mean: 0.8185167074203491 | loss/std: 0.0854855576925571 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 
train (3/10) loss: 0.8352946102619172 | loss/mean: 0.8352946102619172 | loss/std: 0.06578662090051053 | lr: 0.02 | momentum: 0.9
valid (3/10) loss: 0.8046453356742859 | loss/mean: 0.8046453356742859 | loss/std: 0.07672457034081165 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 
train (4/10) loss: 0.8340168714523315 | loss/mean: 0.8340168714523315 | loss/std: 0.058609879995282096 | lr: 0.02 | momentum: 0.9
valid (4/10) loss: 0.8036665558815003 | loss/mean: 0.8036665558815003 | loss/std: 0.07839925830679348 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 
train (5/10) loss: 0.8349691867828368 | loss/mean: 0.8349691867828368 | loss/std: 0.059491842671784874 | lr: 0.02 | momentum: 0.9
valid (5/10) loss: 0.804456603527069 | loss/mean: 0.804456603527069 | loss/std: 0.07448748325550335 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 
train (6/10) loss: 0.8335818290710

train (2/10) loss: 0.7269232094287872 | loss/mean: 0.7269232094287872 | loss/std: 0.04747241660593958 | lr: 0.02 | momentum: 0.9
valid (2/10) loss: 0.6329209804534912 | loss/mean: 0.6329209804534912 | loss/std: 0.04668347844466579 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 
train (3/10) loss: 0.6309882700443268 | loss/mean: 0.6309882700443268 | loss/std: 0.045884008072746306 | lr: 0.02 | momentum: 0.9
valid (3/10) loss: 0.6368893146514892 | loss/mean: 0.6368893146514892 | loss/std: 0.08050025918118241 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 
train (4/10) loss: 0.6237064957618713 | loss/mean: 0.6237064957618713 | loss/std: 0.06018737691869 | lr: 0.02 | momentum: 0.9
valid (4/10) loss: 0.6469945430755615 | loss/mean: 0.6469945430755615 | loss/std: 0.08818583478053123 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 
train (5/10) loss: 0.6437829196453095 | loss/mean: 0.6437829196453095 | loss/std: 0.06179421464216722 | lr: 0.02 | momentum: 0.9
valid (5/10) loss: 0.5825212836265564 | loss/mean: 

valid (1/10) loss: 0.8853433012962342 | loss/mean: 0.8853433012962342 | loss/std: 0.09834605239066169 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 
train (2/10) loss: 0.8822106957435608 | loss/mean: 0.8822106957435608 | loss/std: 0.06401227122699092 | lr: 0.02 | momentum: 0.9
valid (2/10) loss: 0.8146720051765441 | loss/mean: 0.8146720051765441 | loss/std: 0.07717888895549406 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 
train (3/10) loss: 0.8364907503128052 | loss/mean: 0.8364907503128052 | loss/std: 0.06260297728616321 | lr: 0.02 | momentum: 0.9
valid (3/10) loss: 0.8035429954528809 | loss/mean: 0.8035429954528809 | loss/std: 0.0759335748070917 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 
train (4/10) loss: 0.8340963780879974 | loss/mean: 0.8340963780879974 | loss/std: 0.05958363753487288 | lr: 0.02 | momentum: 0.9
valid (4/10) loss: 0.803242540359497 | loss/mean: 0.803242540359497 | loss/std: 0.07671247599263807 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 
train (5/10) loss: 0.834958982467651

train (1/10) loss: 1.047183132171631 | loss/mean: 1.047183132171631 | loss/std: 0.03196789799927719 | lr: 0.02 | momentum: 0.9
valid (1/10) loss: 0.977720320224762 | loss/mean: 0.977720320224762 | loss/std: 0.035570142318212344 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 
train (2/10) loss: 0.9282584369182587 | loss/mean: 0.9282584369182587 | loss/std: 0.05682103847720446 | lr: 0.02 | momentum: 0.9
valid (2/10) loss: 0.842641818523407 | loss/mean: 0.842641818523407 | loss/std: 0.10101596244987054 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 
train (3/10) loss: 0.8313241481781006 | loss/mean: 0.8313241481781006 | loss/std: 0.0465196950490826 | lr: 0.02 | momentum: 0.9
valid (3/10) loss: 0.7663455247879029 | loss/mean: 0.7663455247879029 | loss/std: 0.07610571501526449 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 
train (4/10) loss: 0.7664325714111327 | loss/mean: 0.7664325714111327 | loss/std: 0.03774425422893816 | lr: 0.02 | momentum: 0.9
valid (4/10) loss: 0.7251340031623841 | loss/mean: 0.72

train (1/10) loss: 0.9363475143909453 | loss/mean: 0.9363475143909453 | loss/std: 0.11129629147956774 | lr: 0.02 | momentum: 0.9
valid (1/10) loss: 0.7557720065116882 | loss/mean: 0.7557720065116882 | loss/std: 0.10082214397337247 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 
train (2/10) loss: 0.7267082095146179 | loss/mean: 0.7267082095146179 | loss/std: 0.058265719001576115 | lr: 0.02 | momentum: 0.9
valid (2/10) loss: 0.6378569722175598 | loss/mean: 0.6378569722175598 | loss/std: 0.05901098803313273 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 
train (3/10) loss: 0.6354409337043763 | loss/mean: 0.6354409337043763 | loss/std: 0.055577855346179 | lr: 0.02 | momentum: 0.9
valid (3/10) loss: 0.6394083738327027 | loss/mean: 0.6394083738327027 | loss/std: 0.07929199182296581 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 
train (4/10) loss: 0.6175975799560547 | loss/mean: 0.6175975799560547 | loss/std: 0.05259213390675625 | lr: 0.02 | momentum: 0.9
valid (4/10) loss: 0.6645863890647888 | loss/mean:

train (1/10) loss: 1.0345258474349976 | loss/mean: 1.0345258474349976 | loss/std: 0.07576699362380629 | lr: 0.02 | momentum: 0.9
valid (1/10) loss: 0.8091049313545227 | loss/mean: 0.8091049313545227 | loss/std: 0.08675634187362442 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 
train (2/10) loss: 0.7938700556755066 | loss/mean: 0.7938700556755066 | loss/std: 0.0743973959894513 | lr: 0.02 | momentum: 0.9
valid (2/10) loss: 0.7025946378707886 | loss/mean: 0.7025946378707886 | loss/std: 0.09469286225407814 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 
train (3/10) loss: 0.7442820489406586 | loss/mean: 0.7442820489406586 | loss/std: 0.12205493450956358 | lr: 0.02 | momentum: 0.9
valid (3/10) loss: 0.6604906320571899 | loss/mean: 0.6604906320571899 | loss/std: 0.0869473064671983 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 
train (4/10) loss: 0.7695237874984742 | loss/mean: 0.7695237874984742 | loss/std: 0.1365151877009983 | lr: 0.02 | momentum: 0.9
valid (4/10) loss: 0.7707737803459167 | loss/mean: 0

train (1/10) loss: 1.0190304219722746 | loss/mean: 1.0190304219722746 | loss/std: 0.08861032738191055 | lr: 0.02 | momentum: 0.9
valid (1/10) loss: 0.8082912683486938 | loss/mean: 0.8082912683486938 | loss/std: 0.15027457587042053 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 
train (2/10) loss: 0.8902971267700196 | loss/mean: 0.8902971267700196 | loss/std: 0.13593513397892382 | lr: 0.02 | momentum: 0.9
valid (2/10) loss: 0.83949773311615 | loss/mean: 0.83949773311615 | loss/std: 0.1923242347235882 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 
train (3/10) loss: 0.8807500004768372 | loss/mean: 0.8807500004768372 | loss/std: 0.0779745029953853 | lr: 0.02 | momentum: 0.9
valid (3/10) loss: 0.8298322200775147 | loss/mean: 0.8298322200775147 | loss/std: 0.09489070435049175 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 
train (4/10) loss: 0.8186959624290466 | loss/mean: 0.8186959624290466 | loss/std: 0.05836762242708595 | lr: 0.02 | momentum: 0.9
valid (4/10) loss: 0.774686598777771 | loss/mean: 0.774

train (1/10) loss: 1.0190304219722746 | loss/mean: 1.0190304219722746 | loss/std: 0.08861032738191055 | lr: 0.02 | momentum: 0.9
valid (1/10) loss: 0.8082912683486938 | loss/mean: 0.8082912683486938 | loss/std: 0.15027457587042053 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 
train (2/10) loss: 0.8902971267700196 | loss/mean: 0.8902971267700196 | loss/std: 0.13593513397892382 | lr: 0.02 | momentum: 0.9
valid (2/10) loss: 0.83949773311615 | loss/mean: 0.83949773311615 | loss/std: 0.1923242347235882 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 
train (3/10) loss: 0.8807500004768372 | loss/mean: 0.8807500004768372 | loss/std: 0.0779745029953853 | lr: 0.02 | momentum: 0.9
valid (3/10) loss: 0.8298322200775147 | loss/mean: 0.8298322200775147 | loss/std: 0.09489070435049175 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 
train (4/10) loss: 0.8186959624290466 | loss/mean: 0.8186959624290466 | loss/std: 0.05836762242708595 | lr: 0.02 | momentum: 0.9
valid (4/10) loss: 0.774686598777771 | loss/mean: 0.774

train (1/10) loss: 1.0163361310958863 | loss/mean: 1.0163361310958863 | loss/std: 0.09244736554977662 | lr: 0.02 | momentum: 0.9
valid (1/10) loss: 0.8191330313682557 | loss/mean: 0.8191330313682557 | loss/std: 0.17135748437518733 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 
train (2/10) loss: 0.7963147222995757 | loss/mean: 0.7963147222995757 | loss/std: 0.09979393679371716 | lr: 0.02 | momentum: 0.9
valid (2/10) loss: 0.663582193851471 | loss/mean: 0.663582193851471 | loss/std: 0.044557808378450164 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 
train (3/10) loss: 0.7620907723903655 | loss/mean: 0.7620907723903655 | loss/std: 0.137481486438186 | lr: 0.02 | momentum: 0.9
valid (3/10) loss: 0.8098227381706238 | loss/mean: 0.8098227381706238 | loss/std: 0.1602916758257071 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 
train (4/10) loss: 0.7699208736419678 | loss/mean: 0.7699208736419678 | loss/std: 0.08932819645282947 | lr: 0.02 | momentum: 0.9
valid (4/10) loss: 0.7860375642776489 | loss/mean: 0.

train (1/10) loss: 1.0250050246715545 | loss/mean: 1.0250050246715545 | loss/std: 0.09343451501683421 | lr: 0.02 | momentum: 0.9
valid (1/10) loss: 0.7784855008125305 | loss/mean: 0.7784855008125305 | loss/std: 0.13765026555135493 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 
train (2/10) loss: 0.79947629570961 | loss/mean: 0.79947629570961 | loss/std: 0.06842429881561478 | lr: 0.02 | momentum: 0.9
valid (2/10) loss: 0.6326297044754028 | loss/mean: 0.6326297044754028 | loss/std: 0.05574441058266989 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 
train (3/10) loss: 0.6960597634315491 | loss/mean: 0.6960597634315491 | loss/std: 0.13961987514088545 | lr: 0.02 | momentum: 0.9
valid (3/10) loss: 1.0514347434043885 | loss/mean: 1.0514347434043885 | loss/std: 0.1894463026541697 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 
train (4/10) loss: 1.0218498289585114 | loss/mean: 1.0218498289585114 | loss/std: 0.17795256215459668 | lr: 0.02 | momentum: 0.9
valid (4/10) loss: 0.9646253228187561 | loss/mean: 0.9

* Epoch (10/10) 
Top best models:
logs/trial_3_48/train.2.pth	0.6326
train (1/10) loss: 1.0250050246715545 | loss/mean: 1.0250050246715545 | loss/std: 0.09343451501683421 | lr: 0.02 | momentum: 0.9
valid (1/10) loss: 0.7784855008125305 | loss/mean: 0.7784855008125305 | loss/std: 0.13765026555135493 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 
train (2/10) loss: 0.79947629570961 | loss/mean: 0.79947629570961 | loss/std: 0.06842429881561478 | lr: 0.02 | momentum: 0.9
valid (2/10) loss: 0.6326297044754028 | loss/mean: 0.6326297044754028 | loss/std: 0.05574441058266989 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 
train (3/10) loss: 0.6960597634315491 | loss/mean: 0.6960597634315491 | loss/std: 0.13961987514088545 | lr: 0.02 | momentum: 0.9
valid (3/10) loss: 1.0514347434043885 | loss/mean: 1.0514347434043885 | loss/std: 0.1894463026541697 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 
train (4/10) loss: 1.0218498289585114 | loss/mean: 1.0218498289585114 | loss/std: 0.17795256215459668 | lr: 0.02 |

valid (10/10) loss: 0.6142660498619079 | loss/mean: 0.6142660498619079 | loss/std: 0.03255239538138933 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top best models:
logs/trial_3_51/train.10.pth	0.6143
train (1/10) loss: 1.052332055568695 | loss/mean: 1.052332055568695 | loss/std: 0.062106258404320144 | lr: 0.02 | momentum: 0.9
valid (1/10) loss: 0.9912778496742248 | loss/mean: 0.9912778496742248 | loss/std: 0.09814996004695435 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 
train (2/10) loss: 0.9381893754005433 | loss/mean: 0.9381893754005433 | loss/std: 0.07053563387333206 | lr: 0.02 | momentum: 0.9
valid (2/10) loss: 0.8204873085021973 | loss/mean: 0.8204873085021973 | loss/std: 0.12094588804232788 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 
train (3/10) loss: 0.8111648321151733 | loss/mean: 0.8111648321151733 | loss/std: 0.05209903651987601 | lr: 0.02 | momentum: 0.9
valid (3/10) loss: 0.7385462760925293 | loss/mean: 0.7385462760925293 | loss/std: 0.0905935406575586 | lr: 0.02 | momentum: 

train (10/10) loss: 0.6190915763378143 | loss/mean: 0.6190915763378143 | loss/std: 0.0664610663919963 | lr: 0.02 | momentum: 0.9
valid (10/10) loss: 0.5855647444725036 | loss/mean: 0.5855647444725036 | loss/std: 0.035976597023668906 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top best models:
logs/trial_3_54/train.6.pth	0.5807
train (1/10) loss: 0.9762920737266538 | loss/mean: 0.9762920737266538 | loss/std: 0.10040810237327201 | lr: 0.02 | momentum: 0.9
valid (1/10) loss: 0.7717388987541198 | loss/mean: 0.7717388987541198 | loss/std: 0.10663492216917365 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 
train (2/10) loss: 0.7377185523509979 | loss/mean: 0.7377185523509979 | loss/std: 0.051356545149509984 | lr: 0.02 | momentum: 0.9
valid (2/10) loss: 0.6613162636756897 | loss/mean: 0.6613162636756897 | loss/std: 0.05989420750086711 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 
train (3/10) loss: 0.6401635229587554 | loss/mean: 0.6401635229587554 | loss/std: 0.04496082530576038 | lr: 0.02 | momentu

valid (9/10) loss: 0.7718203067779541 | loss/mean: 0.7718203067779541 | loss/std: 0.13429551892425814 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 
train (10/10) loss: 0.7735246002674103 | loss/mean: 0.7735246002674103 | loss/std: 0.18503796657170593 | lr: 0.02 | momentum: 0.9
valid (10/10) loss: 0.8712966203689575 | loss/mean: 0.8712966203689575 | loss/std: 0.17366721061436152 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top best models:
logs/trial_3_57/train.7.pth	0.6628
train (1/10) loss: 1.109537172317505 | loss/mean: 1.109537172317505 | loss/std: 0.026289442393874027 | lr: 0.02 | momentum: 0.9
valid (1/10) loss: 1.0989206314086915 | loss/mean: 1.0989206314086915 | loss/std: 0.024835902449109864 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 
train (2/10) loss: 1.0931928753852844 | loss/mean: 1.0931928753852844 | loss/std: 0.013608194645473163 | lr: 0.02 | momentum: 0.9
valid (2/10) loss: 1.0789403915405273 | loss/mean: 1.0789403915405273 | loss/std: 0.025605255584464796 | lr: 0.02 | moment

train (9/10) loss: 0.857121467590332 | loss/mean: 0.857121467590332 | loss/std: 0.07741295301460316 | lr: 0.02 | momentum: 0.9
valid (9/10) loss: 0.7718203067779541 | loss/mean: 0.7718203067779541 | loss/std: 0.13429551892425814 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 
train (10/10) loss: 0.7735246002674103 | loss/mean: 0.7735246002674103 | loss/std: 0.18503796657170593 | lr: 0.02 | momentum: 0.9
valid (10/10) loss: 0.8712966203689575 | loss/mean: 0.8712966203689575 | loss/std: 0.17366721061436152 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top best models:
logs/trial_3_60/train.7.pth	0.6628
train (1/10) loss: 1.109537172317505 | loss/mean: 1.109537172317505 | loss/std: 0.026289442393874027 | lr: 0.02 | momentum: 0.9
valid (1/10) loss: 1.0989206314086915 | loss/mean: 1.0989206314086915 | loss/std: 0.024835902449109864 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 
train (2/10) loss: 1.0931928753852844 | loss/mean: 1.0931928753852844 | loss/std: 0.013608194645473163 | lr: 0.02 | momentum:

valid (8/10) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 
train (9/10) loss: 0.9014448702335357 | loss/mean: 0.9014448702335357 | loss/std: 0.1029857517395451 | lr: 0.02 | momentum: 0.9
valid (9/10) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 
train (10/10) loss: 0.9014448702335357 | loss/mean: 0.9014448702335357 | loss/std: 0.1029857517395451 | lr: 0.02 | momentum: 0.9
valid (10/10) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top best models:
logs/trial_3_63/train.1.pth	0.8017
train (1/10) loss: 1.0482073247432706 | loss/mean: 1.0482073247432706 | loss/std: 0.06162333029385587 | lr: 0.02 | momentum: 0.9
valid (1/10) loss: 0.8017066955566406 | loss/mean: 0.8017066955566406 | loss/std: 0.09677581702367065 | lr: 0.02 | momentum: 

train (8/10) loss: 0.9014448702335357 | loss/mean: 0.9014448702335357 | loss/std: 0.1029857517395451 | lr: 0.02 | momentum: 0.9
valid (8/10) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 
train (9/10) loss: 0.9014448702335357 | loss/mean: 0.9014448702335357 | loss/std: 0.1029857517395451 | lr: 0.02 | momentum: 0.9
valid (9/10) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 
train (10/10) loss: 0.9014448702335357 | loss/mean: 0.9014448702335357 | loss/std: 0.1029857517395451 | lr: 0.02 | momentum: 0.9
valid (10/10) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top best models:
logs/trial_3_66/train.1.pth	0.8017
train (1/10) loss: 0.9469355881214142 | loss/mean: 0.9469355881214142 | loss/std: 0.10832442970739534 | lr: 0.02 | momentum: 0

valid (7/10) loss: 0.597417664527893 | loss/mean: 0.597417664527893 | loss/std: 0.05028039820665941 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 
train (8/10) loss: 0.5846370995044708 | loss/mean: 0.5846370995044708 | loss/std: 0.03536568341397501 | lr: 0.02 | momentum: 0.9
valid (8/10) loss: 0.5770642399787903 | loss/mean: 0.5770642399787903 | loss/std: 0.04831495433641338 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 
train (9/10) loss: 0.61834876537323 | loss/mean: 0.61834876537323 | loss/std: 0.07706530608228067 | lr: 0.02 | momentum: 0.9
valid (9/10) loss: 0.5811784863471985 | loss/mean: 0.5811784863471985 | loss/std: 0.0345573423143645 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 
train (10/10) loss: 0.6309664964675903 | loss/mean: 0.6309664964675903 | loss/std: 0.07366937774855571 | lr: 0.02 | momentum: 0.9
valid (10/10) loss: 0.5851340174674988 | loss/mean: 0.5851340174674988 | loss/std: 0.039285486128454776 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top best models:
logs/trial_3_69/tra

train (7/10) loss: 0.713100129365921 | loss/mean: 0.713100129365921 | loss/std: 0.10406291753275412 | lr: 0.02 | momentum: 0.9
valid (7/10) loss: 0.6047044157981872 | loss/mean: 0.6047044157981872 | loss/std: 0.06841217196195123 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 
train (8/10) loss: 0.6017794966697693 | loss/mean: 0.6017794966697693 | loss/std: 0.05558507249203619 | lr: 0.02 | momentum: 0.9
valid (8/10) loss: 0.5770202040672302 | loss/mean: 0.5770202040672302 | loss/std: 0.0327184780280418 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 
train (9/10) loss: 0.6540681838989257 | loss/mean: 0.6540681838989257 | loss/std: 0.08187652510806193 | lr: 0.02 | momentum: 0.9
valid (9/10) loss: 0.6032251834869384 | loss/mean: 0.6032251834869384 | loss/std: 0.05439371606653391 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 
train (10/10) loss: 0.6285529017448425 | loss/mean: 0.6285529017448425 | loss/std: 0.09343577942437056 | lr: 0.02 | momentum: 0.9
valid (10/10) loss: 0.5954004168510437 | loss/mean:

valid (6/10) loss: 0.6256983876228333 | loss/mean: 0.6256983876228333 | loss/std: 0.07234177566544864 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 
train (7/10) loss: 0.6136595010757446 | loss/mean: 0.6136595010757446 | loss/std: 0.05479209824186818 | lr: 0.02 | momentum: 0.9
valid (7/10) loss: 0.5825459241867066 | loss/mean: 0.5825459241867066 | loss/std: 0.03433639234759543 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 
train (8/10) loss: 0.6052732288837434 | loss/mean: 0.6052732288837434 | loss/std: 0.046230681924493464 | lr: 0.02 | momentum: 0.9
valid (8/10) loss: 0.5814140677452088 | loss/mean: 0.5814140677452088 | loss/std: 0.040233691243004466 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 
train (9/10) loss: 0.651656413078308 | loss/mean: 0.651656413078308 | loss/std: 0.08165107939966527 | lr: 0.02 | momentum: 0.9
valid (9/10) loss: 0.5992029309272766 | loss/mean: 0.5992029309272766 | loss/std: 0.04676234229475487 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 
train (10/10) loss: 0.59952515363

train (6/10) loss: 0.6069092571735382 | loss/mean: 0.6069092571735382 | loss/std: 0.08052724813537918 | lr: 0.02 | momentum: 0.9
valid (6/10) loss: 0.6094304084777832 | loss/mean: 0.6094304084777832 | loss/std: 0.0767581224984558 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 
train (7/10) loss: 0.6386058270931243 | loss/mean: 0.6386058270931243 | loss/std: 0.06875850975296702 | lr: 0.02 | momentum: 0.9
valid (7/10) loss: 0.6121927380561829 | loss/mean: 0.6121927380561829 | loss/std: 0.07613745447703932 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 
train (8/10) loss: 0.694084882736206 | loss/mean: 0.694084882736206 | loss/std: 0.08770552896685983 | lr: 0.02 | momentum: 0.9
valid (8/10) loss: 0.6508390784263611 | loss/mean: 0.6508390784263611 | loss/std: 0.08653174605643228 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 
train (9/10) loss: 0.6418895602226257 | loss/mean: 0.6418895602226257 | loss/std: 0.08628466445392716 | lr: 0.02 | momentum: 0.9
valid (9/10) loss: 0.5859748363494873 | loss/mean: 0

train (16/30) loss: 1.2414449214935306 | loss/mean: 1.2414449214935306 | loss/std: 0.1452966486891931 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 1.1714448928833008 | loss/mean: 1.1714448928833008 | loss/std: 0.18516406672882949 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 1.2414449214935306 | loss/mean: 1.2414449214935306 | loss/std: 0.1452966486891931 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 1.1714448928833008 | loss/mean: 1.1714448928833008 | loss/std: 0.18516406672882949 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 1.2414449214935306 | loss/mean: 1.2414449214935306 | loss/std: 0.1452966486891931 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 1.1714448928833008 | loss/mean: 1.1714448928833008 | loss/std: 0.18516406672882949 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 1.2414449214935306 | loss/mean: 1.2414449214935306 | loss/std: 0.1452966486891931 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 1.1714448928833008 | lo

train (16/30) loss: 0.692053347826004 | loss/mean: 0.692053347826004 | loss/std: 0.05378749259615012 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5719072222709656 | loss/mean: 0.5719072222709656 | loss/std: 0.04131144057653249 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.6221863627433777 | loss/mean: 0.6221863627433777 | loss/std: 0.09690149855835 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.7705624818801879 | loss/mean: 0.7705624818801879 | loss/std: 0.13419112292903115 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.699691379070282 | loss/mean: 0.699691379070282 | loss/std: 0.0864784769621961 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.6985453128814697 | loss/mean: 0.6985453128814697 | loss/std: 0.09282605024237899 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.9090593159198762 | loss/mean: 0.9090593159198762 | loss/std: 0.20560027669059147 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 1.1240128040313722 | loss/m

train (16/30) loss: 0.692053347826004 | loss/mean: 0.692053347826004 | loss/std: 0.05378749259615012 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5719072222709656 | loss/mean: 0.5719072222709656 | loss/std: 0.04131144057653249 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.6221863627433777 | loss/mean: 0.6221863627433777 | loss/std: 0.09690149855835 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.7705624818801879 | loss/mean: 0.7705624818801879 | loss/std: 0.13419112292903115 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.699691379070282 | loss/mean: 0.699691379070282 | loss/std: 0.0864784769621961 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.6985453128814697 | loss/mean: 0.6985453128814697 | loss/std: 0.09282605024237899 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.9090593159198762 | loss/mean: 0.9090593159198762 | loss/std: 0.20560027669059147 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 1.1240128040313722 | loss/m

train (16/30) loss: 0.692053347826004 | loss/mean: 0.692053347826004 | loss/std: 0.05378749259615012 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5719072222709656 | loss/mean: 0.5719072222709656 | loss/std: 0.04131144057653249 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.6221863627433777 | loss/mean: 0.6221863627433777 | loss/std: 0.09690149855835 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.7705624818801879 | loss/mean: 0.7705624818801879 | loss/std: 0.13419112292903115 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.699691379070282 | loss/mean: 0.699691379070282 | loss/std: 0.0864784769621961 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.6985453128814697 | loss/mean: 0.6985453128814697 | loss/std: 0.09282605024237899 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.9090593159198762 | loss/mean: 0.9090593159198762 | loss/std: 0.20560027669059147 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 1.1240128040313722 | loss/m

train (16/30) loss: 0.6059948384761811 | loss/mean: 0.6059948384761811 | loss/std: 0.05977068428640991 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5757596731185913 | loss/mean: 0.5757596731185913 | loss/std: 0.03754695243761325 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.7427617669105531 | loss/mean: 0.7427617669105531 | loss/std: 0.12062776078169755 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.6896754264831543 | loss/mean: 0.6896754264831543 | loss/std: 0.10921190273326094 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.612658578157425 | loss/mean: 0.612658578157425 | loss/std: 0.062312858777333634 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5768185377120971 | loss/mean: 0.5768185377120971 | loss/std: 0.03506497382621186 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.6398520767688751 | loss/mean: 0.6398520767688751 | loss/std: 0.08671303049314294 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.590618908405304 | 

train (16/30) loss: 0.6059948384761811 | loss/mean: 0.6059948384761811 | loss/std: 0.05977068428640991 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5757596731185913 | loss/mean: 0.5757596731185913 | loss/std: 0.03754695243761325 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.7427617669105531 | loss/mean: 0.7427617669105531 | loss/std: 0.12062776078169755 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.6896754264831543 | loss/mean: 0.6896754264831543 | loss/std: 0.10921190273326094 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.612658578157425 | loss/mean: 0.612658578157425 | loss/std: 0.062312858777333634 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5768185377120971 | loss/mean: 0.5768185377120971 | loss/std: 0.03506497382621186 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.6398520767688751 | loss/mean: 0.6398520767688751 | loss/std: 0.08671303049314294 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.590618908405304 | 

train (16/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.871444833278656 | loss/mean: 0.

train (16/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.871444833278656 | loss/mean: 0.

train (16/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.871444833278656 | loss/mean: 0.

train (16/30) loss: 0.6005442500114441 | loss/mean: 0.6005442500114441 | loss/std: 0.043668325775412534 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5859116911888123 | loss/mean: 0.5859116911888123 | loss/std: 0.026854462748888575 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.5985369384288788 | loss/mean: 0.5985369384288788 | loss/std: 0.03906689177158716 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5847041845321655 | loss/mean: 0.5847041845321655 | loss/std: 0.026653491589932105 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.5992829680442812 | loss/mean: 0.5992829680442812 | loss/std: 0.046957327508983356 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5851422190666199 | loss/mean: 0.5851422190666199 | loss/std: 0.026938386924568552 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.5963632345199585 | loss/mean: 0.5963632345199585 | loss/std: 0.03463450293879202 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.583307421207

train (16/30) loss: 0.5773136854171753 | loss/mean: 0.5773136854171753 | loss/std: 0.045567620621462246 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5747919082641602 | loss/mean: 0.5747919082641602 | loss/std: 0.044675330420056836 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.5994233965873719 | loss/mean: 0.5994233965873719 | loss/std: 0.06464607594538514 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.586068058013916 | loss/mean: 0.586068058013916 | loss/std: 0.03480734000283747 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.5726947784423829 | loss/mean: 0.5726947784423829 | loss/std: 0.034078535537129986 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5737682938575744 | loss/mean: 0.5737682938575744 | loss/std: 0.03907148693037846 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.5977519750595093 | loss/mean: 0.5977519750595093 | loss/std: 0.06499624651744061 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.583070182800293 

train (16/30) loss: 0.5773136854171753 | loss/mean: 0.5773136854171753 | loss/std: 0.045567620621462246 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5747919082641602 | loss/mean: 0.5747919082641602 | loss/std: 0.044675330420056836 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.5994233965873719 | loss/mean: 0.5994233965873719 | loss/std: 0.06464607594538514 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.586068058013916 | loss/mean: 0.586068058013916 | loss/std: 0.03480734000283747 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.5726947784423829 | loss/mean: 0.5726947784423829 | loss/std: 0.034078535537129986 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5737682938575744 | loss/mean: 0.5737682938575744 | loss/std: 0.03907148693037846 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.5977519750595093 | loss/mean: 0.5977519750595093 | loss/std: 0.06499624651744061 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.583070182800293 

train (16/30) loss: 0.6750525176525116 | loss/mean: 0.6750525176525116 | loss/std: 0.1269832162546138 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.7310159564018249 | loss/mean: 0.7310159564018249 | loss/std: 0.117474563519289 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.6710397779941559 | loss/mean: 0.6710397779941559 | loss/std: 0.07568025672120846 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5745063185691833 | loss/mean: 0.5745063185691833 | loss/std: 0.025342356460428775 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.7042577564716339 | loss/mean: 0.7042577564716339 | loss/std: 0.13304508983995808 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5697672486305236 | loss/mean: 0.5697672486305236 | loss/std: 0.03687272111067537 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.7566052377223969 | loss/mean: 0.7566052377223969 | loss/std: 0.16870447127410368 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.8514450311660766 | 

train (16/30) loss: 0.6750525176525116 | loss/mean: 0.6750525176525116 | loss/std: 0.1269832162546138 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.7310159564018249 | loss/mean: 0.7310159564018249 | loss/std: 0.117474563519289 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.6710397779941559 | loss/mean: 0.6710397779941559 | loss/std: 0.07568025672120846 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5745063185691833 | loss/mean: 0.5745063185691833 | loss/std: 0.025342356460428775 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.7042577564716339 | loss/mean: 0.7042577564716339 | loss/std: 0.13304508983995808 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5697672486305236 | loss/mean: 0.5697672486305236 | loss/std: 0.03687272111067537 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.7566052377223969 | loss/mean: 0.7566052377223969 | loss/std: 0.16870447127410368 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.8514450311660766 | 

* Epoch (15/30) 
train (16/30) loss: 0.6750525176525116 | loss/mean: 0.6750525176525116 | loss/std: 0.1269832162546138 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.7310159564018249 | loss/mean: 0.7310159564018249 | loss/std: 0.117474563519289 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.6710397779941559 | loss/mean: 0.6710397779941559 | loss/std: 0.07568025672120846 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5745063185691833 | loss/mean: 0.5745063185691833 | loss/std: 0.025342356460428775 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.7042577564716339 | loss/mean: 0.7042577564716339 | loss/std: 0.13304508983995808 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5697672486305236 | loss/mean: 0.5697672486305236 | loss/std: 0.03687272111067537 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.7566052377223969 | loss/mean: 0.7566052377223969 | loss/std: 0.16870447127410368 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.85

train (16/30) loss: 0.6750525176525116 | loss/mean: 0.6750525176525116 | loss/std: 0.1269832162546138 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.7310159564018249 | loss/mean: 0.7310159564018249 | loss/std: 0.117474563519289 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.6710397779941559 | loss/mean: 0.6710397779941559 | loss/std: 0.07568025672120846 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5745063185691833 | loss/mean: 0.5745063185691833 | loss/std: 0.025342356460428775 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.7042577564716339 | loss/mean: 0.7042577564716339 | loss/std: 0.13304508983995808 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5697672486305236 | loss/mean: 0.5697672486305236 | loss/std: 0.03687272111067537 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.7566052377223969 | loss/mean: 0.7566052377223969 | loss/std: 0.16870447127410368 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.8514450311660766 | 

* Epoch (15/30) 
train (16/30) loss: 0.9014437913894654 | loss/mean: 0.9014437913894654 | loss/std: 0.10298507652034868 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.8514447808265686 | loss/mean: 0.8514447808265686 | loss/std: 0.20203049084751057 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.9014438569545746 | loss/mean: 0.9014438569545746 | loss/std: 0.10298509113477698 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.8514447808265686 | loss/mean: 0.8514447808265686 | loss/std: 0.20203049084751057 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.9014438927173615 | loss/mean: 0.9014438927173615 | loss/std: 0.10298512036491632 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.8514447808265686 | loss/mean: 0.8514447808265686 | loss/std: 0.20203049084751057 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.9014438807964325 | loss/mean: 0.9014438807964325 | loss/std: 0.10298510282653069 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.

train (16/30) loss: 0.9014437913894654 | loss/mean: 0.9014437913894654 | loss/std: 0.10298507652034868 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.8514447808265686 | loss/mean: 0.8514447808265686 | loss/std: 0.20203049084751057 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.9014438569545746 | loss/mean: 0.9014438569545746 | loss/std: 0.10298509113477698 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.8514447808265686 | loss/mean: 0.8514447808265686 | loss/std: 0.20203049084751057 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.9014438927173615 | loss/mean: 0.9014438927173615 | loss/std: 0.10298512036491632 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.8514447808265686 | loss/mean: 0.8514447808265686 | loss/std: 0.20203049084751057 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.9014438807964325 | loss/mean: 0.9014438807964325 | loss/std: 0.10298510282653069 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.8514447808265686 

train (16/30) loss: 0.9014437913894654 | loss/mean: 0.9014437913894654 | loss/std: 0.10298507652034868 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.8514447808265686 | loss/mean: 0.8514447808265686 | loss/std: 0.20203049084751057 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.9014438569545746 | loss/mean: 0.9014438569545746 | loss/std: 0.10298509113477698 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.8514447808265686 | loss/mean: 0.8514447808265686 | loss/std: 0.20203049084751057 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.9014438927173615 | loss/mean: 0.9014438927173615 | loss/std: 0.10298512036491632 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.8514447808265686 | loss/mean: 0.8514447808265686 | loss/std: 0.20203049084751057 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.9014438807964325 | loss/mean: 0.9014438807964325 | loss/std: 0.10298510282653069 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.8514447808265686 

train (16/30) loss: 0.9014437913894654 | loss/mean: 0.9014437913894654 | loss/std: 0.10298507652034868 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.8514447808265686 | loss/mean: 0.8514447808265686 | loss/std: 0.20203049084751057 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.9014438569545746 | loss/mean: 0.9014438569545746 | loss/std: 0.10298509113477698 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.8514447808265686 | loss/mean: 0.8514447808265686 | loss/std: 0.20203049084751057 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.9014438927173615 | loss/mean: 0.9014438927173615 | loss/std: 0.10298512036491632 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.8514447808265686 | loss/mean: 0.8514447808265686 | loss/std: 0.20203049084751057 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.9014438807964325 | loss/mean: 0.9014438807964325 | loss/std: 0.10298510282653069 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.8514447808265686 

* Epoch (15/30) 
train (16/30) loss: 0.9014437913894654 | loss/mean: 0.9014437913894654 | loss/std: 0.10298507652034868 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.8514447808265686 | loss/mean: 0.8514447808265686 | loss/std: 0.20203049084751057 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.9014438569545746 | loss/mean: 0.9014438569545746 | loss/std: 0.10298509113477698 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.8514447808265686 | loss/mean: 0.8514447808265686 | loss/std: 0.20203049084751057 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.9014438927173615 | loss/mean: 0.9014438927173615 | loss/std: 0.10298512036491632 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.8514447808265686 | loss/mean: 0.8514447808265686 | loss/std: 0.20203049084751057 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.9014438807964325 | loss/mean: 0.9014438807964325 | loss/std: 0.10298510282653069 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.

train (16/30) loss: 0.6080127716064453 | loss/mean: 0.6080127716064453 | loss/std: 0.06030286618547923 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.584760308265686 | loss/mean: 0.584760308265686 | loss/std: 0.034899513227522806 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.6396753549575807 | loss/mean: 0.6396753549575807 | loss/std: 0.09919445081971044 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5936113476753235 | loss/mean: 0.5936113476753235 | loss/std: 0.0416878459550902 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.6322998940944672 | loss/mean: 0.6322998940944672 | loss/std: 0.09756127049860958 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5739701151847839 | loss/mean: 0.5739701151847839 | loss/std: 0.04241272935131342 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.6461474835872651 | loss/mean: 0.6461474835872651 | loss/std: 0.08967448006578577 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.5855655312538147 | 

train (16/30) loss: 0.6080127716064453 | loss/mean: 0.6080127716064453 | loss/std: 0.06030286618547923 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.584760308265686 | loss/mean: 0.584760308265686 | loss/std: 0.034899513227522806 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.6396753549575807 | loss/mean: 0.6396753549575807 | loss/std: 0.09919445081971044 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5936113476753235 | loss/mean: 0.5936113476753235 | loss/std: 0.0416878459550902 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.6322998940944672 | loss/mean: 0.6322998940944672 | loss/std: 0.09756127049860958 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5739701151847839 | loss/mean: 0.5739701151847839 | loss/std: 0.04241272935131342 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.6461474835872651 | loss/mean: 0.6461474835872651 | loss/std: 0.08967448006578577 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.5855655312538147 | 

train (16/30) loss: 0.6900916814804079 | loss/mean: 0.6900916814804079 | loss/std: 0.12605645748256655 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.6529498815536499 | loss/mean: 0.6529498815536499 | loss/std: 0.11036545697261745 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.781372344493866 | loss/mean: 0.781372344493866 | loss/std: 0.12737998278723134 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.8313171863555908 | loss/mean: 0.8313171863555908 | loss/std: 0.1956243532068998 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.8512651324272157 | loss/mean: 0.8512651324272157 | loss/std: 0.07775153971148328 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.8512848854064942 | loss/mean: 0.8512848854064942 | loss/std: 0.20177863147043792 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.8614419996738434 | loss/mean: 0.8614419996738434 | loss/std: 0.08352645690131351 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.8512600779533386 | l

train (16/30) loss: 0.6900916814804079 | loss/mean: 0.6900916814804079 | loss/std: 0.12605645748256655 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.6529498815536499 | loss/mean: 0.6529498815536499 | loss/std: 0.11036545697261745 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.781372344493866 | loss/mean: 0.781372344493866 | loss/std: 0.12737998278723134 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.8313171863555908 | loss/mean: 0.8313171863555908 | loss/std: 0.1956243532068998 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.8512651324272157 | loss/mean: 0.8512651324272157 | loss/std: 0.07775153971148328 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.8512848854064942 | loss/mean: 0.8512848854064942 | loss/std: 0.20177863147043792 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.8614419996738434 | loss/mean: 0.8614419996738434 | loss/std: 0.08352645690131351 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.8512600779533386 | l

* Epoch (15/30) 
train (16/30) loss: 0.6900916814804079 | loss/mean: 0.6900916814804079 | loss/std: 0.12605645748256655 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.6529498815536499 | loss/mean: 0.6529498815536499 | loss/std: 0.11036545697261745 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.781372344493866 | loss/mean: 0.781372344493866 | loss/std: 0.12737998278723134 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.8313171863555908 | loss/mean: 0.8313171863555908 | loss/std: 0.1956243532068998 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.8512651324272157 | loss/mean: 0.8512651324272157 | loss/std: 0.07775153971148328 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.8512848854064942 | loss/mean: 0.8512848854064942 | loss/std: 0.20177863147043792 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.8614419996738434 | loss/mean: 0.8614419996738434 | loss/std: 0.08352645690131351 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.851

* Epoch (15/30) 
train (16/30) loss: 0.6900916814804079 | loss/mean: 0.6900916814804079 | loss/std: 0.12605645748256655 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.6529498815536499 | loss/mean: 0.6529498815536499 | loss/std: 0.11036545697261745 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.781372344493866 | loss/mean: 0.781372344493866 | loss/std: 0.12737998278723134 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.8313171863555908 | loss/mean: 0.8313171863555908 | loss/std: 0.1956243532068998 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.8512651324272157 | loss/mean: 0.8512651324272157 | loss/std: 0.07775153971148328 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.8512848854064942 | loss/mean: 0.8512848854064942 | loss/std: 0.20177863147043792 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.8614419996738434 | loss/mean: 0.8614419996738434 | loss/std: 0.08352645690131351 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.851

* Epoch (15/30) 
train (16/30) loss: 0.6900916814804079 | loss/mean: 0.6900916814804079 | loss/std: 0.12605645748256655 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.6529498815536499 | loss/mean: 0.6529498815536499 | loss/std: 0.11036545697261745 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.781372344493866 | loss/mean: 0.781372344493866 | loss/std: 0.12737998278723134 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.8313171863555908 | loss/mean: 0.8313171863555908 | loss/std: 0.1956243532068998 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.8512651324272157 | loss/mean: 0.8512651324272157 | loss/std: 0.07775153971148328 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.8512848854064942 | loss/mean: 0.8512848854064942 | loss/std: 0.20177863147043792 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.8614419996738434 | loss/mean: 0.8614419996738434 | loss/std: 0.08352645690131351 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.851

train (16/30) loss: 0.6136232852935791 | loss/mean: 0.6136232852935791 | loss/std: 0.03578285479021514 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.6009544253349304 | loss/mean: 0.6009544253349304 | loss/std: 0.03230156053233382 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.6109994590282439 | loss/mean: 0.6109994590282439 | loss/std: 0.03563935482770661 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5989360570907593 | loss/mean: 0.5989360570907593 | loss/std: 0.03185766170889331 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.6087456941604614 | loss/mean: 0.6087456941604614 | loss/std: 0.035548046909553106 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5972478866577149 | loss/mean: 0.5972478866577149 | loss/std: 0.03149463529534849 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.6067927360534667 | loss/mean: 0.6067927360534667 | loss/std: 0.03550323983096115 | lr: 0.02 | momentum: 0.9
valid (19/30) loss: 0.5958252787590027

valid (15/30) loss: 0.5890609145164489 | loss/mean: 0.5890609145164489 | loss/std: 0.02885433233331676 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.600214171409607 | loss/mean: 0.600214171409607 | loss/std: 0.041234241954704084 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.588761854171753 | loss/mean: 0.588761854171753 | loss/std: 0.02877660236801682 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.598428738117218 | loss/mean: 0.598428738117218 | loss/std: 0.03752779868684006 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5877788901329041 | loss/mean: 0.5877788901329041 | loss/std: 0.028740130696912194 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.5984090983867645 | loss/mean: 0.5984090983867645 | loss/std: 0.043409718653709 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5877211332321167 | loss/mean: 0.5877211332321167 | loss/std: 0.028713170662343322 | lr: 0.02 | momentum: 0.9
* Epoch (18/30) 
train (19/30) loss: 0.59669

train (15/30) loss: 0.5740863025188446 | loss/mean: 0.5740863025188446 | loss/std: 0.03991454325158127 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5734926223754883 | loss/mean: 0.5734926223754883 | loss/std: 0.040351802612442936 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.5983335673809052 | loss/mean: 0.5983335673809052 | loss/std: 0.06492153789886417 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5828745365142822 | loss/mean: 0.5828745365142822 | loss/std: 0.03494043264201055 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.5725417256355286 | loss/mean: 0.5725417256355286 | loss/std: 0.03589351326171293 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5738805770874024 | loss/mean: 0.5738805770874024 | loss/std: 0.03866671630687013 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.5964433789253234 | loss/mean: 0.5964433789253234 | loss/std: 0.06481220412345451 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5837336778640747

train (15/30) loss: 0.5740863025188446 | loss/mean: 0.5740863025188446 | loss/std: 0.03991454325158127 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5734926223754883 | loss/mean: 0.5734926223754883 | loss/std: 0.040351802612442936 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.5983335673809052 | loss/mean: 0.5983335673809052 | loss/std: 0.06492153789886417 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5828745365142822 | loss/mean: 0.5828745365142822 | loss/std: 0.03494043264201055 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.5725417256355286 | loss/mean: 0.5725417256355286 | loss/std: 0.03589351326171293 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5738805770874024 | loss/mean: 0.5738805770874024 | loss/std: 0.03866671630687013 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.5964433789253234 | loss/mean: 0.5964433789253234 | loss/std: 0.06481220412345451 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5837336778640747

train (15/30) loss: 0.8998203814029693 | loss/mean: 0.8998203814029693 | loss/std: 0.10201956176304038 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.8500701189041138 | loss/mean: 0.8500701189041138 | loss/std: 0.20105256621384113 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.8895813524723053 | loss/mean: 0.8895813524723053 | loss/std: 0.09623675256818925 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.8108309745788574 | loss/mean: 0.8108309745788574 | loss/std: 0.16941940367115607 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.8133163928985596 | loss/mean: 0.8133163928985596 | loss/std: 0.04950741451169274 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.7448848724365235 | loss/mean: 0.7448848724365235 | loss/std: 0.0681252772202662 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.7467207431793212 | loss/mean: 0.7467207431793212 | loss/std: 0.03699589348727953 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.695949935913086 | 

train (15/30) loss: 0.8998203814029693 | loss/mean: 0.8998203814029693 | loss/std: 0.10201956176304038 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.8500701189041138 | loss/mean: 0.8500701189041138 | loss/std: 0.20105256621384113 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.8895813524723053 | loss/mean: 0.8895813524723053 | loss/std: 0.09623675256818925 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.8108309745788574 | loss/mean: 0.8108309745788574 | loss/std: 0.16941940367115607 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.8133163928985596 | loss/mean: 0.8133163928985596 | loss/std: 0.04950741451169274 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.7448848724365235 | loss/mean: 0.7448848724365235 | loss/std: 0.0681252772202662 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.7467207431793212 | loss/mean: 0.7467207431793212 | loss/std: 0.03699589348727953 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.695949935913086 | 

train (15/30) loss: 0.8998203814029693 | loss/mean: 0.8998203814029693 | loss/std: 0.10201956176304038 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.8500701189041138 | loss/mean: 0.8500701189041138 | loss/std: 0.20105256621384113 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.8895813524723053 | loss/mean: 0.8895813524723053 | loss/std: 0.09623675256818925 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.8108309745788574 | loss/mean: 0.8108309745788574 | loss/std: 0.16941940367115607 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.8133163928985596 | loss/mean: 0.8133163928985596 | loss/std: 0.04950741451169274 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.7448848724365235 | loss/mean: 0.7448848724365235 | loss/std: 0.0681252772202662 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.7467207431793212 | loss/mean: 0.7467207431793212 | loss/std: 0.03699589348727953 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.695949935913086 | 

train (15/30) loss: 0.8998203814029693 | loss/mean: 0.8998203814029693 | loss/std: 0.10201956176304038 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.8500701189041138 | loss/mean: 0.8500701189041138 | loss/std: 0.20105256621384113 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.8895813524723053 | loss/mean: 0.8895813524723053 | loss/std: 0.09623675256818925 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.8108309745788574 | loss/mean: 0.8108309745788574 | loss/std: 0.16941940367115607 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.8133163928985596 | loss/mean: 0.8133163928985596 | loss/std: 0.04950741451169274 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.7448848724365235 | loss/mean: 0.7448848724365235 | loss/std: 0.0681252772202662 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.7467207431793212 | loss/mean: 0.7467207431793212 | loss/std: 0.03699589348727953 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.695949935913086 | 

train (15/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.871444833278656 | loss/mean: 0.

train (15/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.871444833278656 | loss/mean: 0.

* Epoch (14/30) 
train (15/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.87144483327865

train (15/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.871444833278656 | loss/mean: 0.

train (15/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.871444833278656 | loss/mean: 0.

train (15/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.871444833278656 | loss/mean: 0.

train (15/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.871444833278656 | loss/mean: 0.

* Epoch (14/30) 
train (15/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.87144483327865

train (15/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.871444833278656 | loss/mean: 0.

train (15/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.871444833278656 | loss/mean: 0.

* Epoch (14/30) 
train (15/30) loss: 0.9014370501041412 | loss/mean: 0.9014370501041412 | loss/std: 0.10297450578847314 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.9014445841312408 | loss/mean: 0.9014445841312408 | loss/std: 0.10298561143635328 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.926877862215042 | loss/mean: 0.926877862215042 | loss/std: 0.10430124915782417 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.871444

train (15/30) loss: 0.9014370501041412 | loss/mean: 0.9014370501041412 | loss/std: 0.10297450578847314 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.9014445841312408 | loss/mean: 0.9014445841312408 | loss/std: 0.10298561143635328 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.926877862215042 | loss/mean: 0.926877862215042 | loss/std: 0.10430124915782417 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.871444833278656 | loss/

train (15/30) loss: 0.9014370501041412 | loss/mean: 0.9014370501041412 | loss/std: 0.10297450578847314 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.9014445841312408 | loss/mean: 0.9014445841312408 | loss/std: 0.10298561143635328 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.926877862215042 | loss/mean: 0.926877862215042 | loss/std: 0.10430124915782417 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.871444833278656 | loss/

train (15/30) loss: 0.9014370501041412 | loss/mean: 0.9014370501041412 | loss/std: 0.10297450578847314 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.9014445841312408 | loss/mean: 0.9014445841312408 | loss/std: 0.10298561143635328 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.926877862215042 | loss/mean: 0.926877862215042 | loss/std: 0.10430124915782417 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.871444833278656 | loss/

train (15/30) loss: 0.9014370501041412 | loss/mean: 0.9014370501041412 | loss/std: 0.10297450578847314 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.9014445841312408 | loss/mean: 0.9014445841312408 | loss/std: 0.10298561143635328 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.926877862215042 | loss/mean: 0.926877862215042 | loss/std: 0.10430124915782417 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.871444833278656 | loss/

train (15/30) loss: 0.624902230501175 | loss/mean: 0.624902230501175 | loss/std: 0.03497773355641165 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.612080192565918 | loss/mean: 0.612080192565918 | loss/std: 0.0342475182130589 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.6209995150566101 | loss/mean: 0.6209995150566101 | loss/std: 0.034692741754944884 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.6088853478431702 | loss/mean: 0.6088853478431702 | loss/std: 0.03365972159950718 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.6177657246589661 | loss/mean: 0.6177657246589661 | loss/std: 0.034529156826785215 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.6062272548675537 | loss/mean: 0.6062272548675537 | loss/std: 0.03313798134110008 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.614932382106781 | loss/mean: 0.614932382106781 | loss/std: 0.03434630040305663 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.6039676547050477 | los

train (15/30) loss: 0.5773479819297791 | loss/mean: 0.5773479819297791 | loss/std: 0.03951588503520675 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5781537055969238 | loss/mean: 0.5781537055969238 | loss/std: 0.03914906065213559 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.6001749277114868 | loss/mean: 0.6001749277114868 | loss/std: 0.06436695629085154 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.587703537940979 | loss/mean: 0.587703537940979 | loss/std: 0.03899848721621439 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.5791235446929932 | loss/mean: 0.5791235446929932 | loss/std: 0.03194940111810289 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.576936674118042 | loss/mean: 0.576936674118042 | loss/std: 0.03959515571071343 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.6092969834804536 | loss/mean: 0.6092969834804536 | loss/std: 0.07030257288107046 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5874293088912964 | lo

train (15/30) loss: 0.5773479819297791 | loss/mean: 0.5773479819297791 | loss/std: 0.03951588503520675 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5781537055969238 | loss/mean: 0.5781537055969238 | loss/std: 0.03914906065213559 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.6001749277114868 | loss/mean: 0.6001749277114868 | loss/std: 0.06436695629085154 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.587703537940979 | loss/mean: 0.587703537940979 | loss/std: 0.03899848721621439 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.5791235446929932 | loss/mean: 0.5791235446929932 | loss/std: 0.03194940111810289 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.576936674118042 | loss/mean: 0.576936674118042 | loss/std: 0.03959515571071343 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.6092969834804536 | loss/mean: 0.6092969834804536 | loss/std: 0.07030257288107046 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5874293088912964 | lo

train (15/30) loss: 0.6047777950763703 | loss/mean: 0.6047777950763703 | loss/std: 0.06258539688910864 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5932613134384155 | loss/mean: 0.5932613134384155 | loss/std: 0.03555474245036382 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.5763126611709595 | loss/mean: 0.5763126611709595 | loss/std: 0.03273749876267212 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5775428771972656 | loss/mean: 0.5775428771972656 | loss/std: 0.03708431189240612 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.5984833896160126 | loss/mean: 0.5984833896160126 | loss/std: 0.06241599630816807 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5751792311668396 | loss/mean: 0.5751792311668396 | loss/std: 0.03828745167088464 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.613615196943283 | loss/mean: 0.613615196943283 | loss/std: 0.06489315607666174 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.6119322896003723 | 

train (15/30) loss: 0.6047777950763703 | loss/mean: 0.6047777950763703 | loss/std: 0.06258539688910864 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5932613134384155 | loss/mean: 0.5932613134384155 | loss/std: 0.03555474245036382 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.5763126611709595 | loss/mean: 0.5763126611709595 | loss/std: 0.03273749876267212 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5775428771972656 | loss/mean: 0.5775428771972656 | loss/std: 0.03708431189240612 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.5984833896160126 | loss/mean: 0.5984833896160126 | loss/std: 0.06241599630816807 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5751792311668396 | loss/mean: 0.5751792311668396 | loss/std: 0.03828745167088464 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.613615196943283 | loss/mean: 0.613615196943283 | loss/std: 0.06489315607666174 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.6119322896003723 | 

train (15/30) loss: 0.5994187891483306 | loss/mean: 0.5994187891483306 | loss/std: 0.04138264755712946 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5859010219573975 | loss/mean: 0.5859010219573975 | loss/std: 0.026955276551652713 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.5987054824829101 | loss/mean: 0.5987054824829101 | loss/std: 0.04328864440382823 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5856449961662292 | loss/mean: 0.5856449961662292 | loss/std: 0.026977123528192676 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.5975082397460938 | loss/mean: 0.5975082397460938 | loss/std: 0.04025276307548556 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5847720384597779 | loss/mean: 0.5847720384597779 | loss/std: 0.026898857613995315 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.5973440706729889 | loss/mean: 0.5973440706729889 | loss/std: 0.046132233823813366 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5850626826286

train (15/30) loss: 0.6061451494693756 | loss/mean: 0.6061451494693756 | loss/std: 0.05582642748492772 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5765239238739014 | loss/mean: 0.5765239238739014 | loss/std: 0.050445061962586926 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.7963122189044952 | loss/mean: 0.7963122189044952 | loss/std: 0.15164286309042685 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.851168429851532 | loss/mean: 0.851168429851532 | loss/std: 0.20188831651215303 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.8615246117115022 | loss/mean: 0.8615246117115022 | loss/std: 0.08343874819079175 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.8294395923614502 | loss/mean: 0.8294395923614502 | loss/std: 0.19169746749422495 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.6824403285980225 | loss/mean: 0.6824403285980225 | loss/std: 0.1002762328354555 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.6211643934249877 | 

train (15/30) loss: 0.6061451494693756 | loss/mean: 0.6061451494693756 | loss/std: 0.05582642748492772 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5765239238739014 | loss/mean: 0.5765239238739014 | loss/std: 0.050445061962586926 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.7963122189044952 | loss/mean: 0.7963122189044952 | loss/std: 0.15164286309042685 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.851168429851532 | loss/mean: 0.851168429851532 | loss/std: 0.20188831651215303 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.8615246117115022 | loss/mean: 0.8615246117115022 | loss/std: 0.08343874819079175 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.8294395923614502 | loss/mean: 0.8294395923614502 | loss/std: 0.19169746749422495 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.6824403285980225 | loss/mean: 0.6824403285980225 | loss/std: 0.1002762328354555 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.6211643934249877 | 

train (15/30) loss: 0.6061451494693756 | loss/mean: 0.6061451494693756 | loss/std: 0.05582642748492772 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5765239238739014 | loss/mean: 0.5765239238739014 | loss/std: 0.050445061962586926 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.7963122189044952 | loss/mean: 0.7963122189044952 | loss/std: 0.15164286309042685 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.851168429851532 | loss/mean: 0.851168429851532 | loss/std: 0.20188831651215303 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.8615246117115022 | loss/mean: 0.8615246117115022 | loss/std: 0.08343874819079175 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.8294395923614502 | loss/mean: 0.8294395923614502 | loss/std: 0.19169746749422495 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.6824403285980225 | loss/mean: 0.6824403285980225 | loss/std: 0.1002762328354555 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.6211643934249877 | 

* Epoch (14/30) 
train (15/30) loss: 0.6061451494693756 | loss/mean: 0.6061451494693756 | loss/std: 0.05582642748492772 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5765239238739014 | loss/mean: 0.5765239238739014 | loss/std: 0.050445061962586926 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.7963122189044952 | loss/mean: 0.7963122189044952 | loss/std: 0.15164286309042685 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.851168429851532 | loss/mean: 0.851168429851532 | loss/std: 0.20188831651215303 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.8615246117115022 | loss/mean: 0.8615246117115022 | loss/std: 0.08343874819079175 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.8294395923614502 | loss/mean: 0.8294395923614502 | loss/std: 0.19169746749422495 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.6824403285980225 | loss/mean: 0.6824403285980225 | loss/std: 0.1002762328354555 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.62

train (15/30) loss: 0.6061451494693756 | loss/mean: 0.6061451494693756 | loss/std: 0.05582642748492772 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5765239238739014 | loss/mean: 0.5765239238739014 | loss/std: 0.050445061962586926 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.7963122189044952 | loss/mean: 0.7963122189044952 | loss/std: 0.15164286309042685 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.851168429851532 | loss/mean: 0.851168429851532 | loss/std: 0.20188831651215303 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.8615246117115022 | loss/mean: 0.8615246117115022 | loss/std: 0.08343874819079175 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.8294395923614502 | loss/mean: 0.8294395923614502 | loss/std: 0.19169746749422495 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.6824403285980225 | loss/mean: 0.6824403285980225 | loss/std: 0.1002762328354555 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.6211643934249877 | 

train (15/30) loss: 0.9824942052364349 | loss/mean: 0.9824942052364349 | loss/std: 0.16126615217228396 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 1.090620720386505 | loss/mean: 1.090620720386505 | loss/std: 0.14181210840573852 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.9216438829898834 | loss/mean: 0.9216438829898834 | loss/std: 0.18537578410742536 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.9708919048309326 | loss/mean: 0.9708919048309326 | loss/std: 0.17348282969985704 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.8945739328861236 | loss/mean: 0.8945739328861236 | loss/std: 0.12372608340605995 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 1.1514577746391297 | loss/mean: 1.1514577746391297 | loss/std: 0.16902984117129136 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 1.1299295604228974 | loss/mean: 1.1299295604228974 | loss/std: 0.11754536604446966 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 1.1516458153724671 | 

train (15/30) loss: 0.9824942052364349 | loss/mean: 0.9824942052364349 | loss/std: 0.16126615217228396 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 1.090620720386505 | loss/mean: 1.090620720386505 | loss/std: 0.14181210840573852 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.9216438829898834 | loss/mean: 0.9216438829898834 | loss/std: 0.18537578410742536 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.9708919048309326 | loss/mean: 0.9708919048309326 | loss/std: 0.17348282969985704 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.8945739328861236 | loss/mean: 0.8945739328861236 | loss/std: 0.12372608340605995 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 1.1514577746391297 | loss/mean: 1.1514577746391297 | loss/std: 0.16902984117129136 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 1.1299295604228974 | loss/mean: 1.1299295604228974 | loss/std: 0.11754536604446966 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 1.1516458153724671 | 

* Epoch (14/30) 
train (15/30) loss: 0.9824942052364349 | loss/mean: 0.9824942052364349 | loss/std: 0.16126615217228396 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 1.090620720386505 | loss/mean: 1.090620720386505 | loss/std: 0.14181210840573852 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.9216438829898834 | loss/mean: 0.9216438829898834 | loss/std: 0.18537578410742536 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.9708919048309326 | loss/mean: 0.9708919048309326 | loss/std: 0.17348282969985704 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.8945739328861236 | loss/mean: 0.8945739328861236 | loss/std: 0.12372608340605995 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 1.1514577746391297 | loss/mean: 1.1514577746391297 | loss/std: 0.16902984117129136 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 1.1299295604228974 | loss/mean: 1.1299295604228974 | loss/std: 0.11754536604446966 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 1.15

train (15/30) loss: 0.9824942052364349 | loss/mean: 0.9824942052364349 | loss/std: 0.16126615217228396 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 1.090620720386505 | loss/mean: 1.090620720386505 | loss/std: 0.14181210840573852 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.9216438829898834 | loss/mean: 0.9216438829898834 | loss/std: 0.18537578410742536 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.9708919048309326 | loss/mean: 0.9708919048309326 | loss/std: 0.17348282969985704 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.8945739328861236 | loss/mean: 0.8945739328861236 | loss/std: 0.12372608340605995 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 1.1514577746391297 | loss/mean: 1.1514577746391297 | loss/std: 0.16902984117129136 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 1.1299295604228974 | loss/mean: 1.1299295604228974 | loss/std: 0.11754536604446966 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 1.1516458153724671 | 

train (15/30) loss: 0.9824942052364349 | loss/mean: 0.9824942052364349 | loss/std: 0.16126615217228396 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 1.090620720386505 | loss/mean: 1.090620720386505 | loss/std: 0.14181210840573852 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.9216438829898834 | loss/mean: 0.9216438829898834 | loss/std: 0.18537578410742536 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.9708919048309326 | loss/mean: 0.9708919048309326 | loss/std: 0.17348282969985704 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.8945739328861236 | loss/mean: 0.8945739328861236 | loss/std: 0.12372608340605995 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 1.1514577746391297 | loss/mean: 1.1514577746391297 | loss/std: 0.16902984117129136 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 1.1299295604228974 | loss/mean: 1.1299295604228974 | loss/std: 0.11754536604446966 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 1.1516458153724671 | 

train (15/30) loss: 0.5759882807731629 | loss/mean: 0.5759882807731629 | loss/std: 0.03227368098673879 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5764612674713134 | loss/mean: 0.5764612674713134 | loss/std: 0.03498291199754339 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.5962968766689299 | loss/mean: 0.5962968766689299 | loss/std: 0.06468512854681005 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5805328726768494 | loss/mean: 0.5805328726768494 | loss/std: 0.03413309757304231 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.5720474481582641 | loss/mean: 0.5720474481582641 | loss/std: 0.03153611982039933 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5736502885818482 | loss/mean: 0.5736502885818482 | loss/std: 0.03813604236969019 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.5986849665641786 | loss/mean: 0.5986849665641786 | loss/std: 0.06572629494313283 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5855064034461975 

train (15/30) loss: 0.5759882807731629 | loss/mean: 0.5759882807731629 | loss/std: 0.03227368098673879 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5764612674713134 | loss/mean: 0.5764612674713134 | loss/std: 0.03498291199754339 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.5962968766689299 | loss/mean: 0.5962968766689299 | loss/std: 0.06468512854681005 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5805328726768494 | loss/mean: 0.5805328726768494 | loss/std: 0.03413309757304231 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.5720474481582641 | loss/mean: 0.5720474481582641 | loss/std: 0.03153611982039933 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5736502885818482 | loss/mean: 0.5736502885818482 | loss/std: 0.03813604236969019 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.5986849665641786 | loss/mean: 0.5986849665641786 | loss/std: 0.06572629494313283 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5855064034461975 

train (15/30) loss: 0.5837002754211426 | loss/mean: 0.5837002754211426 | loss/std: 0.05198978119491697 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5745816469192505 | loss/mean: 0.5745816469192505 | loss/std: 0.04188083699925745 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.6027324974536897 | loss/mean: 0.6027324974536897 | loss/std: 0.06439375188506526 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5883761286735535 | loss/mean: 0.5883761286735535 | loss/std: 0.037954241055976855 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.5788886964321137 | loss/mean: 0.5788886964321137 | loss/std: 0.03266012959734287 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5775530934333801 | loss/mean: 0.5775530934333801 | loss/std: 0.036903470051017816 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.5980209112167358 | loss/mean: 0.5980209112167358 | loss/std: 0.06494686515660321 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.58349050283432 

train (15/30) loss: 0.5837002754211426 | loss/mean: 0.5837002754211426 | loss/std: 0.05198978119491697 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5745816469192505 | loss/mean: 0.5745816469192505 | loss/std: 0.04188083699925745 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.6027324974536897 | loss/mean: 0.6027324974536897 | loss/std: 0.06439375188506526 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5883761286735535 | loss/mean: 0.5883761286735535 | loss/std: 0.037954241055976855 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.5788886964321137 | loss/mean: 0.5788886964321137 | loss/std: 0.03266012959734287 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5775530934333801 | loss/mean: 0.5775530934333801 | loss/std: 0.036903470051017816 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.5980209112167358 | loss/mean: 0.5980209112167358 | loss/std: 0.06494686515660321 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.58349050283432 

train (15/30) loss: 0.5837002754211426 | loss/mean: 0.5837002754211426 | loss/std: 0.05198978119491697 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5745816469192505 | loss/mean: 0.5745816469192505 | loss/std: 0.04188083699925745 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.6027324974536897 | loss/mean: 0.6027324974536897 | loss/std: 0.06439375188506526 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5883761286735535 | loss/mean: 0.5883761286735535 | loss/std: 0.037954241055976855 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.5788886964321137 | loss/mean: 0.5788886964321137 | loss/std: 0.03266012959734287 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5775530934333801 | loss/mean: 0.5775530934333801 | loss/std: 0.036903470051017816 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.5980209112167358 | loss/mean: 0.5980209112167358 | loss/std: 0.06494686515660321 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.58349050283432 

train (15/30) loss: 0.5784415721893311 | loss/mean: 0.5784415721893311 | loss/std: 0.04738600822109546 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5743316769599914 | loss/mean: 0.5743316769599914 | loss/std: 0.04222269162736776 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.6013394117355345 | loss/mean: 0.6013394117355345 | loss/std: 0.06508338355996454 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5829010725021362 | loss/mean: 0.5829010725021362 | loss/std: 0.040142634627291875 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.5819371879100799 | loss/mean: 0.5819371879100799 | loss/std: 0.030679400031737584 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5760425925254822 | loss/mean: 0.5760425925254822 | loss/std: 0.039303194365406496 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.616763460636139 | loss/mean: 0.616763460636139 | loss/std: 0.0727019142125854 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5890289306640625 

train (15/30) loss: 0.5784415721893311 | loss/mean: 0.5784415721893311 | loss/std: 0.04738600822109546 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5743316769599914 | loss/mean: 0.5743316769599914 | loss/std: 0.04222269162736776 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.6013394117355345 | loss/mean: 0.6013394117355345 | loss/std: 0.06508338355996454 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5829010725021362 | loss/mean: 0.5829010725021362 | loss/std: 0.040142634627291875 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.5819371879100799 | loss/mean: 0.5819371879100799 | loss/std: 0.030679400031737584 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5760425925254822 | loss/mean: 0.5760425925254822 | loss/std: 0.039303194365406496 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.616763460636139 | loss/mean: 0.616763460636139 | loss/std: 0.0727019142125854 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5890289306640625 

train (15/30) loss: 0.5784415721893311 | loss/mean: 0.5784415721893311 | loss/std: 0.04738600822109546 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5743316769599914 | loss/mean: 0.5743316769599914 | loss/std: 0.04222269162736776 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.6013394117355345 | loss/mean: 0.6013394117355345 | loss/std: 0.06508338355996454 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5829010725021362 | loss/mean: 0.5829010725021362 | loss/std: 0.040142634627291875 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.5819371879100799 | loss/mean: 0.5819371879100799 | loss/std: 0.030679400031737584 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5760425925254822 | loss/mean: 0.5760425925254822 | loss/std: 0.039303194365406496 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.616763460636139 | loss/mean: 0.616763460636139 | loss/std: 0.0727019142125854 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5890289306640625 

train (15/30) loss: 0.598220670223236 | loss/mean: 0.598220670223236 | loss/std: 0.06398814495617283 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5899559020996094 | loss/mean: 0.5899559020996094 | loss/std: 0.03604295630924886 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.57574502825737 | loss/mean: 0.57574502825737 | loss/std: 0.03081015807965006 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5763995051383972 | loss/mean: 0.5763995051383972 | loss/std: 0.03837792699124953 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.6031041443347931 | loss/mean: 0.6031041443347931 | loss/std: 0.06813155916244738 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5879887342453003 | loss/mean: 0.5879887342453003 | loss/std: 0.037362155514783585 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.5734049379825592 | loss/mean: 0.5734049379825592 | loss/std: 0.0353238780931436 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5745045900344848 | loss

train (15/30) loss: 0.598220670223236 | loss/mean: 0.598220670223236 | loss/std: 0.06398814495617283 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.5899559020996094 | loss/mean: 0.5899559020996094 | loss/std: 0.03604295630924886 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.57574502825737 | loss/mean: 0.57574502825737 | loss/std: 0.03081015807965006 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5763995051383972 | loss/mean: 0.5763995051383972 | loss/std: 0.03837792699124953 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.6031041443347931 | loss/mean: 0.6031041443347931 | loss/std: 0.06813155916244738 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5879887342453003 | loss/mean: 0.5879887342453003 | loss/std: 0.037362155514783585 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.5734049379825592 | loss/mean: 0.5734049379825592 | loss/std: 0.0353238780931436 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5745045900344848 | loss

train (15/30) loss: 0.6851683974266052 | loss/mean: 0.6851683974266052 | loss/std: 0.11276370604079128 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.6282920360565185 | loss/mean: 0.6282920360565185 | loss/std: 0.07290204293038144 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.6557808697223663 | loss/mean: 0.6557808697223663 | loss/std: 0.11425730044693222 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5782842636108398 | loss/mean: 0.5782842636108398 | loss/std: 0.04853697598153971 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.6280758082866669 | loss/mean: 0.6280758082866669 | loss/std: 0.07363829579593353 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5785884022712707 | loss/mean: 0.5785884022712707 | loss/std: 0.031107004645365827 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.5956905364990234 | loss/mean: 0.5956905364990234 | loss/std: 0.08610226871400462 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5961543917655945

train (15/30) loss: 0.6851683974266052 | loss/mean: 0.6851683974266052 | loss/std: 0.11276370604079128 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.6282920360565185 | loss/mean: 0.6282920360565185 | loss/std: 0.07290204293038144 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.6557808697223663 | loss/mean: 0.6557808697223663 | loss/std: 0.11425730044693222 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5782842636108398 | loss/mean: 0.5782842636108398 | loss/std: 0.04853697598153971 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.6280758082866669 | loss/mean: 0.6280758082866669 | loss/std: 0.07363829579593353 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5785884022712707 | loss/mean: 0.5785884022712707 | loss/std: 0.031107004645365827 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.5956905364990234 | loss/mean: 0.5956905364990234 | loss/std: 0.08610226871400462 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5961543917655945

train (15/30) loss: 0.6851683974266052 | loss/mean: 0.6851683974266052 | loss/std: 0.11276370604079128 | lr: 0.02 | momentum: 0.9
valid (15/30) loss: 0.6282920360565185 | loss/mean: 0.6282920360565185 | loss/std: 0.07290204293038144 | lr: 0.02 | momentum: 0.9
* Epoch (15/30) 
train (16/30) loss: 0.6557808697223663 | loss/mean: 0.6557808697223663 | loss/std: 0.11425730044693222 | lr: 0.02 | momentum: 0.9
valid (16/30) loss: 0.5782842636108398 | loss/mean: 0.5782842636108398 | loss/std: 0.04853697598153971 | lr: 0.02 | momentum: 0.9
* Epoch (16/30) 
train (17/30) loss: 0.6280758082866669 | loss/mean: 0.6280758082866669 | loss/std: 0.07363829579593353 | lr: 0.02 | momentum: 0.9
valid (17/30) loss: 0.5785884022712707 | loss/mean: 0.5785884022712707 | loss/std: 0.031107004645365827 | lr: 0.02 | momentum: 0.9
* Epoch (17/30) 
train (18/30) loss: 0.5956905364990234 | loss/mean: 0.5956905364990234 | loss/std: 0.08610226871400462 | lr: 0.02 | momentum: 0.9
valid (18/30) loss: 0.5961543917655945

FileNotFoundError: Missing file 'logs/trial_2_0/last_full.pth'!

# Searching neural architectures by using Bayesian hyperparameter optimization

In [54]:
from sklearn.gaussian_process import GaussianProcessRegressor

In [55]:
def get_bayesian_optimization_input_features(layer_configurations):
    bo_input_features = []
    for layer_configuration in layer_configurations:
        bo_input_feature = layer_configuration + [0] * (6 - len(layer_configuration))
        bo_input_features.append(bo_input_feature)
    return bo_input_features

In [56]:
number_of_configurations = [100, 2000]
epochs_per_conf = 15
topk_models = 5


In [58]:
trial_numbers = np.array(
    range(len(layer_configurations))
)
trial_number = 0
model = None
best_valid_losses_per_iteration = []
best_configurations_per_iteration = []
overall_bo_input_features = []
overall_bo_valid_losses = []
for number_of_configuration in number_of_configurations:
    valid_losses = []
    layer_configurations = get_random_configurations(
        number_of_configuration, rng
    )
    if model:
        bo_input_features = get_bayesian_optimization_input_features(layer_configurations)
        predicted_valid_losses = model.predict(
            bo_input_features
        )
        top_k_idx = np.argsort(
            predicted_valid_losses
        )[:topk_models]
        layer_configurations = layer_configurations[
            top_k_idx
        ]
    for idx, layer_configuration in enumerate(layer_configurations):
        trial_identifier = 'bo_{}'.format(trial_number)
        valid_loss = train_and_evaluate_mlp(
            trial_number, 
            input_layer_size=iris_input_dataset.shape[1], 
            output_layer_size=3,
            layer_configuration=layer_configuration,
            epochs=epochs_per_conf,
            load_on_stage_start=False,
            best_or_last='best'
        )
        valid_losses.append(valid_loss)
        trial_number += 1 
    best_loss_idx = np.argmin(valid_losses)
    best_valid_losses_per_iteration.append(
        valid_losses[best_loss_idx]
    )
    best_configurations_per_iteration.append(
        layer_configurations[best_loss_idx]
    )
    bo_input_features = get_bayesian_optimization_input_features(layer_configurations)
    overall_bo_input_features.extend(bo_input_features)
    overall_bo_valid_losses.extend(valid_losses)
    model = GaussianProcessRegressor()
    model.fit(
        overall_bo_input_features, 
        overall_bo_valid_losses
    )


  layer_configurations = np.array(layer_configurations)
  x = F.softmax(x)


train (1/15) loss: 1.1105597257614135 | loss/mean: 1.1105597257614135 | loss/std: 0.026692359348913938 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 1.0984391450881958 | loss/mean: 1.0984391450881958 | loss/std: 0.01435408310852528 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 1.0965941190719606 | loss/mean: 1.0965941190719606 | loss/std: 0.004006419870531935 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 1.1002785921096803 | loss/mean: 1.1002785921096803 | loss/std: 0.006304637312494726 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 1.0976936101913455 | loss/mean: 1.0976936101913455 | loss/std: 0.00675443826719348 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 1.101915717124939 | loss/mean: 1.101915717124939 | loss/std: 0.011454164809258968 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 1.0979388594627382 | loss/mean: 1.0979388594627382 | loss/std: 0.009922413939067985 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 1.1034156322479247 | loss/m

train (1/15) loss: 1.1161424040794372 | loss/mean: 1.1161424040794372 | loss/std: 0.0345314736577223 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 1.0982954502105713 | loss/mean: 1.0982954502105713 | loss/std: 0.01674794163506465 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 1.096289575099945 | loss/mean: 1.096289575099945 | loss/std: 0.005365734867303785 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 1.1011442422866822 | loss/mean: 1.1011442422866822 | loss/std: 0.010008415581770297 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 1.097359549999237 | loss/mean: 1.097359549999237 | loss/std: 0.007264686267846628 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 1.1023531198501586 | loss/mean: 1.1023531198501586 | loss/std: 0.01342612500355667 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 1.0974648118019104 | loss/mean: 1.0974648118019104 | loss/std: 0.009157827212389606 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 1.1028610467910767 | loss/mean:

* Epoch (15/15) 
Top best models:
logs/trial_3/train.1.pth	1.0983
train (1/15) loss: 1.0001943647861482 | loss/mean: 1.0001943647861482 | loss/std: 0.08788550875166115 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.809182059764862 | loss/mean: 0.809182059764862 | loss/std: 0.13670873785775836 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.7989143967628479 | loss/mean: 0.7989143967628479 | loss/std: 0.057386516439031594 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.6735683917999268 | loss/mean: 0.6735683917999268 | loss/std: 0.06531871852058117 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.6491065323352814 | loss/mean: 0.6491065323352814 | loss/std: 0.04017658598129999 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6028829336166381 | loss/mean: 0.6028829336166381 | loss/std: 0.05846507625450368 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.6086212754249574 | loss/mean: 0.6086212754249574 | loss/std: 0.0517481148620719 | lr: 0.02 |

valid (15/15) loss: 0.587175726890564 | loss/mean: 0.587175726890564 | loss/std: 0.04325511093497118 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_5/train.14.pth	0.5733
train (1/15) loss: 1.0001943647861482 | loss/mean: 1.0001943647861482 | loss/std: 0.08788550875166115 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.809182059764862 | loss/mean: 0.809182059764862 | loss/std: 0.13670873785775836 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.7989143967628479 | loss/mean: 0.7989143967628479 | loss/std: 0.057386516439031594 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.6735683917999268 | loss/mean: 0.6735683917999268 | loss/std: 0.06531871852058117 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.6491065323352814 | loss/mean: 0.6491065323352814 | loss/std: 0.04017658598129999 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6028829336166381 | loss/mean: 0.6028829336166381 | loss/std: 0.05846507625450368 | lr: 0.02 | momentum: 0.9


train (15/15) loss: 0.8914509117603302 | loss/mean: 0.8914509117603302 | loss/std: 0.13632496178411654 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.8714449524879455 | loss/mean: 0.8714449524879455 | loss/std: 0.17379319499364249 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_7/train.9.pth	0.5925
train (1/15) loss: 1.0845669031143188 | loss/mean: 1.0845669031143188 | loss/std: 0.024143328804098373 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.9404824852943421 | loss/mean: 0.9404824852943421 | loss/std: 0.0801234202484514 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.9004479050636292 | loss/mean: 0.9004479050636292 | loss/std: 0.07105756797964828 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.8055326819419861 | loss/mean: 0.8055326819419861 | loss/std: 0.07997661012966277 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.7798250317573547 | loss/mean: 0.7798250317573547 | loss/std: 0.06259161344909744 | lr: 0.02 | momentum: 0

* Epoch (14/15) 
train (15/15) loss: 0.8914509117603302 | loss/mean: 0.8914509117603302 | loss/std: 0.13632496178411654 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.8714449524879455 | loss/mean: 0.8714449524879455 | loss/std: 0.17379319499364249 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_9/train.9.pth	0.5925
train (1/15) loss: 1.0845669031143188 | loss/mean: 1.0845669031143188 | loss/std: 0.024143328804098373 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.9404824852943421 | loss/mean: 0.9404824852943421 | loss/std: 0.0801234202484514 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.9004479050636292 | loss/mean: 0.9004479050636292 | loss/std: 0.07105756797964828 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.8055326819419861 | loss/mean: 0.8055326819419861 | loss/std: 0.07997661012966277 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.7798250317573547 | loss/mean: 0.7798250317573547 | loss/std: 0.06259161344909744 | lr: 0

valid (14/15) loss: 0.5812777400016784 | loss/mean: 0.5812777400016784 | loss/std: 0.036279075626241365 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.606410539150238 | loss/mean: 0.606410539150238 | loss/std: 0.06978728906039992 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.5767632126808167 | loss/mean: 0.5767632126808167 | loss/std: 0.03382747166437363 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_11/train.15.pth	0.5768
train (1/15) loss: 1.0448597967624664 | loss/mean: 1.0448597967624664 | loss/std: 0.060585461161322206 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.9401585817337036 | loss/mean: 0.9401585817337036 | loss/std: 0.07286703816757384 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.8367466807365418 | loss/mean: 0.8367466807365418 | loss/std: 0.06619481424218619 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7191599130630493 | loss/mean: 0.7191599130630493 | loss/std: 0.07242234444374177 | lr: 0.02 | momentu

train (14/15) loss: 0.5921071767807007 | loss/mean: 0.5921071767807007 | loss/std: 0.05394942290981413 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.5812777400016784 | loss/mean: 0.5812777400016784 | loss/std: 0.036279075626241365 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.606410539150238 | loss/mean: 0.606410539150238 | loss/std: 0.06978728906039992 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.5767632126808167 | loss/mean: 0.5767632126808167 | loss/std: 0.03382747166437363 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_13/train.15.pth	0.5768
train (1/15) loss: 0.9704628169536591 | loss/mean: 0.9704628169536591 | loss/std: 0.11796628651193096 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.7479287385940552 | loss/mean: 0.7479287385940552 | loss/std: 0.12062336850883595 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.6871814012527465 | loss/mean: 0.6871814012527465 | loss/std: 0.10447908459638602 | lr: 0.02 | momentu

* Epoch (13/15) 
train (14/15) loss: 0.7153805136680603 | loss/mean: 0.7153805136680603 | loss/std: 0.12273872735281582 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.5716752767562866 | loss/mean: 0.5716752767562866 | loss/std: 0.03995584018131111 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.5912322819232941 | loss/mean: 0.5912322819232941 | loss/std: 0.06607450906679667 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.5930710792541504 | loss/mean: 0.5930710792541504 | loss/std: 0.04455761981853796 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_15/train.14.pth	0.5717
train (1/15) loss: 0.9704628169536591 | loss/mean: 0.9704628169536591 | loss/std: 0.11796628651193096 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.7479287385940552 | loss/mean: 0.7479287385940552 | loss/std: 0.12062336850883595 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.6871814012527465 | loss/mean: 0.6871814012527465 | loss/std: 0.10447908459638602 | 

valid (13/15) loss: 0.8037644624710083 | loss/mean: 0.8037644624710083 | loss/std: 0.07491341882540532 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.8330515325069426 | loss/mean: 0.8330515325069426 | loss/std: 0.060400894516281806 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.8037675976753235 | loss/mean: 0.8037675976753235 | loss/std: 0.07491526928430203 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.8330336332321168 | loss/mean: 0.8330336332321168 | loss/std: 0.0603860639420478 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.803777027130127 | loss/mean: 0.803777027130127 | loss/std: 0.07490257262162353 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_17/train.4.pth	0.8032
train (1/15) loss: 1.0752697706222534 | loss/mean: 1.0752697706222534 | loss/std: 0.04411566048713155 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.8940818190574646 | loss/mean: 0.8940818190574646 | loss/std: 0.10169356699513812 | lr: 0.02 | momentu

* Epoch (12/15) 
train (13/15) loss: 0.8330882132053375 | loss/mean: 0.8330882132053375 | loss/std: 0.06042292226337011 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.8037644624710083 | loss/mean: 0.8037644624710083 | loss/std: 0.07491341882540532 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.8330515325069426 | loss/mean: 0.8330515325069426 | loss/std: 0.060400894516281806 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.8037675976753235 | loss/mean: 0.8037675976753235 | loss/std: 0.07491526928430203 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.8330336332321168 | loss/mean: 0.8330336332321168 | loss/std: 0.0603860639420478 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.803777027130127 | loss/mean: 0.803777027130127 | loss/std: 0.07490257262162353 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_19/train.4.pth	0.8032
train (1/15) loss: 1.0752697706222534 | loss/mean: 1.0752697706222534 | loss/std: 0.04411566048713155 | 

valid (12/15) loss: 0.8037532567977905 | loss/mean: 0.8037532567977905 | loss/std: 0.07492486172818574 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.8330882132053375 | loss/mean: 0.8330882132053375 | loss/std: 0.06042292226337011 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.8037644624710083 | loss/mean: 0.8037644624710083 | loss/std: 0.07491341882540532 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.8330515325069426 | loss/mean: 0.8330515325069426 | loss/std: 0.060400894516281806 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.8037675976753235 | loss/mean: 0.8037675976753235 | loss/std: 0.07491526928430203 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.8330336332321168 | loss/mean: 0.8330336332321168 | loss/std: 0.0603860639420478 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.803777027130127 | loss/mean: 0.803777027130127 | loss/std: 0.07490257262162353 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/tr

train (12/15) loss: 0.9014447987079621 | loss/mean: 0.9014447987079621 | loss/std: 0.10298572250896403 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.9014448404312133 | loss/mean: 0.9014448404312133 | loss/std: 0.10298571958588248 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.9014448463916779 | loss/mean: 0.9014448463916779 | loss/std: 0.10298571666282283 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.9014448463916779 | loss/mean: 0.9014448463916779 | loss/std: 0.10298571666282283 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.8514448642730713 

train (12/15) loss: 0.9014447987079621 | loss/mean: 0.9014447987079621 | loss/std: 0.10298572250896403 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.9014448404312133 | loss/mean: 0.9014448404312133 | loss/std: 0.10298571958588248 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.9014448463916779 | loss/mean: 0.9014448463916779 | loss/std: 0.10298571666282283 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.9014448463916779 | loss/mean: 0.9014448463916779 | loss/std: 0.10298571666282283 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.8514448642730713 

valid (11/15) loss: 0.6523320317268372 | loss/mean: 0.6523320317268372 | loss/std: 0.045905796285609105 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.6817568242549896 | loss/mean: 0.6817568242549896 | loss/std: 0.0683978859647994 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.6236732840538025 | loss/mean: 0.6236732840538025 | loss/std: 0.0558907142157434 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.5953488171100616 | loss/mean: 0.5953488171100616 | loss/std: 0.0349694845195809 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.5757008790969849 | loss/mean: 0.5757008790969849 | loss/std: 0.040849613313246345 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.614537227153778 | loss/mean: 0.614537227153778 | loss/std: 0.06418196900052171 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.5961841464042663 | loss/mean: 0.5961841464042663 | loss/std: 0.07761944757585724 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.727

train (11/15) loss: 0.688029032945633 | loss/mean: 0.688029032945633 | loss/std: 0.06363611164205668 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.6523320317268372 | loss/mean: 0.6523320317268372 | loss/std: 0.045905796285609105 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.6817568242549896 | loss/mean: 0.6817568242549896 | loss/std: 0.0683978859647994 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.6236732840538025 | loss/mean: 0.6236732840538025 | loss/std: 0.0558907142157434 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.5953488171100616 | loss/mean: 0.5953488171100616 | loss/std: 0.0349694845195809 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.5757008790969849 | loss/mean: 0.5757008790969849 | loss/std: 0.040849613313246345 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.614537227153778 | loss/mean: 0.614537227153778 | loss/std: 0.06418196900052171 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.5961841464042663 | los

valid (10/15) loss: 0.5969588994979859 | loss/mean: 0.5969588994979859 | loss/std: 0.040216777598152384 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.5791823625564575 | loss/mean: 0.5791823625564575 | loss/std: 0.04386459333838349 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.576793658733368 | loss/mean: 0.576793658733368 | loss/std: 0.04045428737245389 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.5929615914821624 | loss/mean: 0.5929615914821624 | loss/std: 0.0621626970952478 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.5917900562286377 | loss/mean: 0.5917900562286377 | loss/std: 0.04186317043347243 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.6120904862880707 | loss/mean: 0.6120904862880707 | loss/std: 0.056310933991303457 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.5779412865638733 | loss/mean: 0.5779412865638733 | loss/std: 0.039233301602376286 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.

train (10/15) loss: 0.9014448702335357 | loss/mean: 0.9014448702335357 | loss/std: 0.1029857517395451 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.9015406370162964 | loss/mean: 0.9015406370162964 | loss/std: 0.10293914592143158 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.9014545500278472 | loss/mean: 0.9014545500278472 | loss/std: 0.1029810083706775 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.9014459073543548 | loss/mean: 0.9014459073543548 | loss/std: 0.10298524316422003 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.8514448642730713 | 

train (10/15) loss: 0.9014448702335357 | loss/mean: 0.9014448702335357 | loss/std: 0.1029857517395451 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.9015406370162964 | loss/mean: 0.9015406370162964 | loss/std: 0.10293914592143158 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.9014545500278472 | loss/mean: 0.9014545500278472 | loss/std: 0.1029810083706775 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.9014459073543548 | loss/mean: 0.9014459073543548 | loss/std: 0.10298524316422003 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.8514448642730713 | 

valid (9/15) loss: 0.6614918947219849 | loss/mean: 0.6614918947219849 | loss/std: 0.08072895253632881 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.6224737048149109 | loss/mean: 0.6224737048149109 | loss/std: 0.06784951023072174 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.580428957939148 | loss/mean: 0.580428957939148 | loss/std: 0.03624095531618193 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.7055221796035767 | loss/mean: 0.7055221796035767 | loss/std: 0.11134162364213634 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.7033332109451294 | loss/mean: 0.7033332109451294 | loss/std: 0.10073068057547739 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.6277777791023256 | loss/mean: 0.6277777791023256 | loss/std: 0.07606786129187978 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.579463517665863 | loss/mean: 0.579463517665863 | loss/std: 0.0250530214453485 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.7078388

train (9/15) loss: 0.5993421852588654 | loss/mean: 0.5993421852588654 | loss/std: 0.03270640929560622 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.586439597606659 | loss/mean: 0.586439597606659 | loss/std: 0.036520199322429954 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.6210965812206268 | loss/mean: 0.6210965812206268 | loss/std: 0.06314107321455356 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.595153522491455 | loss/mean: 0.595153522491455 | loss/std: 0.03381170316840597 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.5889570593833924 | loss/mean: 0.5889570593833924 | loss/std: 0.028257160159708226 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.5831400275230407 | loss/mean: 0.5831400275230407 | loss/std: 0.039372632040390945 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.6105443596839905 | loss/mean: 0.6105443596839905 | loss/std: 0.061375878942943836 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.591755723953247 | lo

valid (8/15) loss: 0.8036393761634827 | loss/mean: 0.8036393761634827 | loss/std: 0.07516062535311253 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.8334066927433015 | loss/mean: 0.8334066927433015 | loss/std: 0.06040886159537438 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.8037634015083313 | loss/mean: 0.8037634015083313 | loss/std: 0.07490621437111569 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.8333717107772827 | loss/mean: 0.8333717107772827 | loss/std: 0.06078293869128232 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.8038619518280029 | loss/mean: 0.8038619518280029 | loss/std: 0.0747453636024222 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.8332191526889802 | loss/mean: 0.8332191526889802 | loss/std: 0.060810270038215986 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.8037947177886963 | loss/mean: 0.8037947177886963 | loss/std: 0.0748528218447276 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.833243

* Epoch (7/15) 
train (8/15) loss: 0.8330304563045501 | loss/mean: 0.8330304563045501 | loss/std: 0.06086424634133397 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.8036393761634827 | loss/mean: 0.8036393761634827 | loss/std: 0.07516062535311253 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.8334066927433015 | loss/mean: 0.8334066927433015 | loss/std: 0.06040886159537438 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.8037634015083313 | loss/mean: 0.8037634015083313 | loss/std: 0.07490621437111569 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.8333717107772827 | loss/mean: 0.8333717107772827 | loss/std: 0.06078293869128232 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.8038619518280029 | loss/mean: 0.8038619518280029 | loss/std: 0.0747453636024222 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.8332191526889802 | loss/mean: 0.8332191526889802 | loss/std: 0.060810270038215986 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.8037947

valid (7/15) loss: 0.804017186164856 | loss/mean: 0.804017186164856 | loss/std: 0.07453316280871422 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 0.8330304563045501 | loss/mean: 0.8330304563045501 | loss/std: 0.06086424634133397 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.8036393761634827 | loss/mean: 0.8036393761634827 | loss/std: 0.07516062535311253 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.8334066927433015 | loss/mean: 0.8334066927433015 | loss/std: 0.06040886159537438 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.8037634015083313 | loss/mean: 0.8037634015083313 | loss/std: 0.07490621437111569 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.8333717107772827 | loss/mean: 0.8333717107772827 | loss/std: 0.06078293869128232 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.8038619518280029 | loss/mean: 0.8038619518280029 | loss/std: 0.0747453636024222 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.83321915268

train (7/15) loss: 0.6857902109622955 | loss/mean: 0.6857902109622955 | loss/std: 0.08444213709832855 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 0.6345863103866577 | loss/mean: 0.6345863103866577 | loss/std: 0.07607622483120761 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 0.620855450630188 | loss/mean: 0.620855450630188 | loss/std: 0.04749796153834859 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.5766020894050599 | loss/mean: 0.5766020894050599 | loss/std: 0.030144597041614374 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.617849725484848 | loss/mean: 0.617849725484848 | loss/std: 0.0674187833775642 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.5752262473106384 | loss/mean: 0.5752262473106384 | loss/std: 0.04009148561399324 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.7731736183166503 | loss/mean: 0.7731736183166503 | loss/std: 0.14072960565928835 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.8494810461997986 | loss/mean: 

valid (6/15) loss: 0.5728770852088928 | loss/mean: 0.5728770852088928 | loss/std: 0.04172423129191837 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 0.8137210726737977 | loss/mean: 0.8137210726737977 | loss/std: 0.20950958145838344 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 0.8768322229385376 | loss/mean: 0.8768322229385376 | loss/std: 0.17183730843609452 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 0.8976805806159973 | loss/mean: 0.8976805806159973 | loss/std: 0.13550972597638958 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.871444308757782 | loss/mean: 0.871444308757782 | loss/std: 0.17379364854776783 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.8914569139480591 | loss/mean: 0.8914569139480591 | loss/std: 0.13632659765763092 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.8714548826217652 | loss/mean: 0.8714548826217652 | loss/std: 0.17378888675019288 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.8915725290775

train (6/15) loss: 0.5976035594940186 | loss/mean: 0.5976035594940186 | loss/std: 0.06141233225734943 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.5728770852088928 | loss/mean: 0.5728770852088928 | loss/std: 0.04172423129191837 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 0.8137210726737977 | loss/mean: 0.8137210726737977 | loss/std: 0.20950958145838344 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 0.8768322229385376 | loss/mean: 0.8768322229385376 | loss/std: 0.17183730843609452 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 0.8976805806159973 | loss/mean: 0.8976805806159973 | loss/std: 0.13550972597638958 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.871444308757782 | loss/mean: 0.871444308757782 | loss/std: 0.17379364854776783 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.8914569139480591 | loss/mean: 0.8914569139480591 | loss/std: 0.13632659765763092 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.8714548826217652 | loss/mean: 

valid (5/15) loss: 0.588910698890686 | loss/mean: 0.588910698890686 | loss/std: 0.0407169677482757 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.6689273178577423 | loss/mean: 0.6689273178577423 | loss/std: 0.08311622124064778 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.5861554861068725 | loss/mean: 0.5861554861068725 | loss/std: 0.031118682335731947 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 0.5996321856975556 | loss/mean: 0.5996321856975556 | loss/std: 0.0539246180999263 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 0.5902156352996826 | loss/mean: 0.5902156352996826 | loss/std: 0.05281525095333641 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 0.622950941324234 | loss/mean: 0.622950941324234 | loss/std: 0.07055281566415582 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.5944953083992004 | loss/mean: 0.5944953083992004 | loss/std: 0.041056684978879036 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.5888179004192352

train (5/15) loss: 0.6141265988349914 | loss/mean: 0.6141265988349914 | loss/std: 0.05706541567705562 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.6665913343429566 | loss/mean: 0.6665913343429566 | loss/std: 0.0933607717965107 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.645551884174347 | loss/mean: 0.645551884174347 | loss/std: 0.059259277898426846 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.5795424342155456 | loss/mean: 0.5795424342155456 | loss/std: 0.035596185666136995 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 0.6709331154823303 | loss/mean: 0.6709331154823303 | loss/std: 0.08911546668225617 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 0.5766360998153687 | loss/mean: 0.5766360998153687 | loss/std: 0.02854587825206969 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 0.6018508791923524 | loss/mean: 0.6018508791923524 | loss/std: 0.057576236490062015 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.5763245105743409 | loss/mean

valid (4/15) loss: 1.0996476888656617 | loss/mean: 1.0996476888656617 | loss/std: 0.004312442893218265 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 1.097939598560333 | loss/mean: 1.097939598560333 | loss/std: 0.003542833062691383 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 1.100527000427246 | loss/mean: 1.100527000427246 | loss/std: 0.007447273950209362 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 1.0977449893951414 | loss/mean: 1.0977449893951414 | loss/std: 0.005433076270324424 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 1.1012794733047486 | loss/mean: 1.1012794733047486 | loss/std: 0.009973161786523266 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 1.0976237773895263 | loss/mean: 1.0976237773895263 | loss/std: 0.0069872517113755825 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 1.101910662651062 | loss/mean: 1.101910662651062 | loss/std: 0.011986331203003764 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 1.0975480794

train (4/15) loss: 1.098251175880432 | loss/mean: 1.098251175880432 | loss/std: 0.0014006761669585054 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 1.0996476888656617 | loss/mean: 1.0996476888656617 | loss/std: 0.004312442893218265 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 1.097939598560333 | loss/mean: 1.097939598560333 | loss/std: 0.003542833062691383 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 1.100527000427246 | loss/mean: 1.100527000427246 | loss/std: 0.007447273950209362 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 1.0977449893951414 | loss/mean: 1.0977449893951414 | loss/std: 0.005433076270324424 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 1.1012794733047486 | loss/mean: 1.1012794733047486 | loss/std: 0.009973161786523266 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 1.0976237773895263 | loss/mean: 1.0976237773895263 | loss/std: 0.0069872517113755825 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 1.101910662651062 | loss/me

valid (3/15) loss: 0.9067992687225341 | loss/mean: 0.9067992687225341 | loss/std: 0.03244028968705349 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.8546930313110352 | loss/mean: 0.8546930313110352 | loss/std: 0.05789068005267876 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.7707686185836792 | loss/mean: 0.7707686185836792 | loss/std: 0.09783427227752084 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.7200147628784178 | loss/mean: 0.7200147628784178 | loss/std: 0.05546726750548687 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.6693239808082581 | loss/mean: 0.6693239808082581 | loss/std: 0.09725858779808111 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.6121078968048095 | loss/mean: 0.6121078968048095 | loss/std: 0.050600296653064855 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.585128664970398 | loss/mean: 0.585128664970398 | loss/std: 0.04753469560289783 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 0.6087977170944

train (3/15) loss: 1.019672691822052 | loss/mean: 1.019672691822052 | loss/std: 0.04115781077770836 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.9067992687225341 | loss/mean: 0.9067992687225341 | loss/std: 0.03244028968705349 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.8546930313110352 | loss/mean: 0.8546930313110352 | loss/std: 0.05789068005267876 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.7707686185836792 | loss/mean: 0.7707686185836792 | loss/std: 0.09783427227752084 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.7200147628784178 | loss/mean: 0.7200147628784178 | loss/std: 0.05546726750548687 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.6693239808082581 | loss/mean: 0.6693239808082581 | loss/std: 0.09725858779808111 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.6121078968048095 | loss/mean: 0.6121078968048095 | loss/std: 0.050600296653064855 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.585128664970398 | loss/mean: 

train (3/15) loss: 0.7032306373119355 | loss/mean: 0.7032306373119355 | loss/std: 0.09359868943072389 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6517342448234558 | loss/mean: 0.6517342448234558 | loss/std: 0.10172655269828823 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.7287745714187622 | loss/mean: 0.7287745714187622 | loss/std: 0.08598232709541877 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.7188589811325073 | loss/mean: 0.7188589811325073 | loss/std: 0.11530857722156256 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.6753986477851869 | loss/mean: 0.6753986477851869 | loss/std: 0.09839530368946038 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.7347015857696533 | loss/mean: 0.7347015857696533 | loss/std: 0.12094815967710652 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.8166434586048126 | loss/mean: 0.8166434586048126 | loss/std: 0.09866559187389784 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.7717166066169738 | loss/mean

train (3/15) loss: 0.7032306373119355 | loss/mean: 0.7032306373119355 | loss/std: 0.09359868943072389 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6517342448234558 | loss/mean: 0.6517342448234558 | loss/std: 0.10172655269828823 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.7287745714187622 | loss/mean: 0.7287745714187622 | loss/std: 0.08598232709541877 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.7188589811325073 | loss/mean: 0.7188589811325073 | loss/std: 0.11530857722156256 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.6753986477851869 | loss/mean: 0.6753986477851869 | loss/std: 0.09839530368946038 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.7347015857696533 | loss/mean: 0.7347015857696533 | loss/std: 0.12094815967710652 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.8166434586048126 | loss/mean: 0.8166434586048126 | loss/std: 0.09866559187389784 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.7717166066169738 | loss/mean

* Epoch (2/15) 
train (3/15) loss: 0.8428152918815613 | loss/mean: 0.8428152918815613 | loss/std: 0.06670279090685896 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.7874375581741333 | loss/mean: 0.7874375581741333 | loss/std: 0.11698730738089635 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.8088427662849427 | loss/mean: 0.8088427662849427 | loss/std: 0.048516652298237574 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.7559465169906616 | loss/mean: 0.7559465169906616 | loss/std: 0.09389498633771316 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.7560312867164611 | loss/mean: 0.7560312867164611 | loss/std: 0.042296658831995904 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.7298741817474366 | loss/mean: 0.7298741817474366 | loss/std: 0.11268863596852385 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.6950486779212951 | loss/mean: 0.6950486779212951 | loss/std: 0.062173920071079455 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.688238525

valid (2/15) loss: 0.7985044956207276 | loss/mean: 0.7985044956207276 | loss/std: 0.11235824157990874 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.8428152918815613 | loss/mean: 0.8428152918815613 | loss/std: 0.06670279090685896 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.7874375581741333 | loss/mean: 0.7874375581741333 | loss/std: 0.11698730738089635 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.8088427662849427 | loss/mean: 0.8088427662849427 | loss/std: 0.048516652298237574 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.7559465169906616 | loss/mean: 0.7559465169906616 | loss/std: 0.09389498633771316 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.7560312867164611 | loss/mean: 0.7560312867164611 | loss/std: 0.042296658831995904 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.7298741817474366 | loss/mean: 0.7298741817474366 | loss/std: 0.11268863596852385 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.6950486779

train (2/15) loss: 0.8073497414588928 | loss/mean: 0.8073497414588928 | loss/std: 0.05923248761324301 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7037274837493896 | loss/mean: 0.7037274837493896 | loss/std: 0.06252994818323433 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.6818123698234558 | loss/mean: 0.6818123698234558 | loss/std: 0.033457255350900085 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6517322301864624 | loss/mean: 0.6517322301864624 | loss/std: 0.06565314187821775 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.6343222439289092 | loss/mean: 0.6343222439289092 | loss/std: 0.04696263741459684 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.6443768858909606 | loss/mean: 0.6443768858909606 | loss/std: 0.07961833133605575 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.6278923392295838 | loss/mean: 0.6278923392295838 | loss/std: 0.04939400439427417 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.6403292894363404 | loss/mea

valid (1/15) loss: 0.7214526295661926 | loss/mean: 0.7214526295661926 | loss/std: 0.08488662101018349 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.667618829011917 | loss/mean: 0.667618829011917 | loss/std: 0.07517241960544731 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.5979053735733032 | loss/mean: 0.5979053735733032 | loss/std: 0.051020088032094214 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.6927381038665771 | loss/mean: 0.6927381038665771 | loss/std: 0.14895879145396512 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6438682317733765 | loss/mean: 0.6438682317733765 | loss/std: 0.10429991308436162 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.6435474395751952 | loss/mean: 0.6435474395751952 | loss/std: 0.08203875668403118 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.8291697025299072 | loss/mean: 0.8291697025299072 | loss/std: 0.1047468818595764 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.64901618957519

train (1/15) loss: 0.952274638414383 | loss/mean: 0.952274638414383 | loss/std: 0.13152026043833084 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.7214526295661926 | loss/mean: 0.7214526295661926 | loss/std: 0.08488662101018349 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.667618829011917 | loss/mean: 0.667618829011917 | loss/std: 0.07517241960544731 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.5979053735733032 | loss/mean: 0.5979053735733032 | loss/std: 0.051020088032094214 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.6927381038665771 | loss/mean: 0.6927381038665771 | loss/std: 0.14895879145396512 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6438682317733765 | loss/mean: 0.6438682317733765 | loss/std: 0.10429991308436162 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.6435474395751952 | loss/mean: 0.6435474395751952 | loss/std: 0.08203875668403118 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.8291697025299072 | loss/mean: 0

train (1/15) loss: 0.9853062510490417 | loss/mean: 0.9853062510490417 | loss/std: 0.08853261940357163 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.7733149886131286 | loss/mean: 0.7733149886131286 | loss/std: 0.10028810064775455 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.7530283212661744 | loss/mean: 0.7530283212661744 | loss/std: 0.053703380494951235 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.6617521643638611 | loss/mean: 0.6617521643638611 | loss/std: 0.07640579150679297 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.6554184317588807 | loss/mean: 0.6554184317588807 | loss/std: 0.04444975881765937 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6327598333358765 | loss/mean: 0.6327598333358765 | loss/std: 0.08059699993518527 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.5996226787567138 | loss/mean: 0.5996226787567138 | loss/std: 0.03573489529111414 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.5953715443611145 | loss/mea

train (1/15) loss: 0.9853062510490417 | loss/mean: 0.9853062510490417 | loss/std: 0.08853261940357163 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.7733149886131286 | loss/mean: 0.7733149886131286 | loss/std: 0.10028810064775455 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.7530283212661744 | loss/mean: 0.7530283212661744 | loss/std: 0.053703380494951235 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.6617521643638611 | loss/mean: 0.6617521643638611 | loss/std: 0.07640579150679297 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.6554184317588807 | loss/mean: 0.6554184317588807 | loss/std: 0.04444975881765937 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6327598333358765 | loss/mean: 0.6327598333358765 | loss/std: 0.08059699993518527 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.5996226787567138 | loss/mean: 0.5996226787567138 | loss/std: 0.03573489529111414 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.5953715443611145 | loss/mea

* Epoch (15/15) 
Top best models:
logs/trial_82/train.2.pth	0.7626
train (1/15) loss: 1.0942625761032103 | loss/mean: 1.0942625761032103 | loss/std: 0.023276813383437962 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.9751927137374878 | loss/mean: 0.9751927137374878 | loss/std: 0.0601381028440425 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.8699765741825103 | loss/mean: 0.8699765741825103 | loss/std: 0.09390851144040327 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7625882267951966 | loss/mean: 0.7625882267951966 | loss/std: 0.0715411572188168 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.9036904454231264 | loss/mean: 0.9036904454231264 | loss/std: 0.16602263527135827 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.871834683418274 | loss/mean: 0.871834683418274 | loss/std: 0.1735243254558177 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.8924678504467011 | loss/mean: 0.8924678504467011 | loss/std: 0.13603631424199206 | lr: 0.02 | 

train (1/15) loss: 1.0942625761032103 | loss/mean: 1.0942625761032103 | loss/std: 0.023276813383437962 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.9751927137374878 | loss/mean: 0.9751927137374878 | loss/std: 0.0601381028440425 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.8699765741825103 | loss/mean: 0.8699765741825103 | loss/std: 0.09390851144040327 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7625882267951966 | loss/mean: 0.7625882267951966 | loss/std: 0.0715411572188168 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.9036904454231264 | loss/mean: 0.9036904454231264 | loss/std: 0.16602263527135827 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.871834683418274 | loss/mean: 0.871834683418274 | loss/std: 0.1735243254558177 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.8924678504467011 | loss/mean: 0.8924678504467011 | loss/std: 0.13603631424199206 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.8725369095802307 | loss/mean: 0.

* Epoch (15/15) 
Top best models:
logs/trial_86/train.2.pth	0.7626
train (1/15) loss: 1.0059491455554963 | loss/mean: 1.0059491455554963 | loss/std: 0.0924307185640475 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.8014093041419983 | loss/mean: 0.8014093041419983 | loss/std: 0.13363122039122446 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.8428043723106383 | loss/mean: 0.8428043723106383 | loss/std: 0.10605539203459131 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7876577138900757 | loss/mean: 0.7876577138900757 | loss/std: 0.08316098888998374 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.7149894773960114 | loss/mean: 0.7149894773960114 | loss/std: 0.03692341831139288 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6666159868240357 | loss/mean: 0.6666159868240357 | loss/std: 0.09665210816003601 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.647160279750824 | loss/mean: 0.647160279750824 | loss/std: 0.10161588702982453 | lr: 0.02 |

train (1/15) loss: 1.0059491455554963 | loss/mean: 1.0059491455554963 | loss/std: 0.0924307185640475 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.8014093041419983 | loss/mean: 0.8014093041419983 | loss/std: 0.13363122039122446 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.8428043723106383 | loss/mean: 0.8428043723106383 | loss/std: 0.10605539203459131 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7876577138900757 | loss/mean: 0.7876577138900757 | loss/std: 0.08316098888998374 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.7149894773960114 | loss/mean: 0.7149894773960114 | loss/std: 0.03692341831139288 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6666159868240357 | loss/mean: 0.6666159868240357 | loss/std: 0.09665210816003601 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.647160279750824 | loss/mean: 0.647160279750824 | loss/std: 0.10161588702982453 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.5859526991844177 | loss/mean: 0

* Epoch (15/15) 
Top best models:
logs/trial_90/train.4.pth	0.5860
train (1/15) loss: 0.9559536874294282 | loss/mean: 0.9559536874294282 | loss/std: 0.10912616060689136 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.7587308764457703 | loss/mean: 0.7587308764457703 | loss/std: 0.09540045416617803 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.7318225264549255 | loss/mean: 0.7318225264549255 | loss/std: 0.05906673116769949 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.6397962927818298 | loss/mean: 0.6397962927818298 | loss/std: 0.05715780050211655 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.6421280801296234 | loss/mean: 0.6421280801296234 | loss/std: 0.04834927139907733 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6664407253265381 | loss/mean: 0.6664407253265381 | loss/std: 0.08790029832347782 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.6427670538425445 | loss/mean: 0.6427670538425445 | loss/std: 0.04814710746390248 | lr: 0.0

valid (15/15) loss: 0.5758029103279114 | loss/mean: 0.5758029103279114 | loss/std: 0.03607513158077087 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_92/train.9.pth	0.5753
train (1/15) loss: 0.9722730159759521 | loss/mean: 0.9722730159759521 | loss/std: 0.10916155255618823 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.7248199224472046 | loss/mean: 0.7248199224472046 | loss/std: 0.062368597184641585 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.7747004568576813 | loss/mean: 0.7747004568576813 | loss/std: 0.08836214579586586 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.6211688399314881 | loss/mean: 0.6211688399314881 | loss/std: 0.04815897638508142 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.6524807393550873 | loss/mean: 0.6524807393550873 | loss/std: 0.09062377505199153 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6382652759552002 | loss/mean: 0.6382652759552002 | loss/std: 0.04880225176398525 | lr: 0.02 | momentum: 

train (15/15) loss: 0.6866810083389283 | loss/mean: 0.6866810083389283 | loss/std: 0.04637764570557555 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.6430735468864441 | loss/mean: 0.6430735468864441 | loss/std: 0.08854572404273649 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_94/train.13.pth	0.5695
train (1/15) loss: 0.9722730159759521 | loss/mean: 0.9722730159759521 | loss/std: 0.10916155255618823 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.7248199224472046 | loss/mean: 0.7248199224472046 | loss/std: 0.062368597184641585 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.7747004568576813 | loss/mean: 0.7747004568576813 | loss/std: 0.08836214579586586 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.6211688399314881 | loss/mean: 0.6211688399314881 | loss/std: 0.04815897638508142 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.6524807393550873 | loss/mean: 0.6524807393550873 | loss/std: 0.09062377505199153 | lr: 0.02 | momentum

valid (14/15) loss: 0.7781798958778381 | loss/mean: 0.7781798958778381 | loss/std: 0.06075474275772615 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.7886781632900238 | loss/mean: 0.7886781632900238 | loss/std: 0.03547397468295875 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.7709504127502441 | loss/mean: 0.7709504127502441 | loss/std: 0.05930245627642896 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_96/train.15.pth	0.7710
train (1/15) loss: 1.0543825030326843 | loss/mean: 1.0543825030326843 | loss/std: 0.05675654059940453 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.8708818197250366 | loss/mean: 0.8708818197250366 | loss/std: 0.09949205221881077 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.8694382190704346 | loss/mean: 0.8694382190704346 | loss/std: 0.05784288360655069 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7623839259147644 | loss/mean: 0.7623839259147644 | loss/std: 0.06865516333498436 | lr: 0.02 | momentu

train (14/15) loss: 0.7658971667289735 | loss/mean: 0.7658971667289735 | loss/std: 0.08637601609803412 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.6069766640663147 | loss/mean: 0.6069766640663147 | loss/std: 0.05022515540481183 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.6228995263576508 | loss/mean: 0.6228995263576508 | loss/std: 0.06629428896088702 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.5850900769233703 | loss/mean: 0.5850900769233703 | loss/std: 0.04060201492703492 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_98/train.15.pth	0.5851
train (1/15) loss: 1.0543825030326843 | loss/mean: 1.0543825030326843 | loss/std: 0.05675654059940453 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.8708818197250366 | loss/mean: 0.8708818197250366 | loss/std: 0.09949205221881077 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.8694382190704346 | loss/mean: 0.8694382190704346 | loss/std: 0.05784288360655069 | lr: 0.02 | moment

valid (13/15) loss: 0.7408624410629272 | loss/mean: 0.7408624410629272 | loss/std: 0.10591088374687682 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.7658971667289735 | loss/mean: 0.7658971667289735 | loss/std: 0.08637601609803412 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.6069766640663147 | loss/mean: 0.6069766640663147 | loss/std: 0.05022515540481183 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.6228995263576508 | loss/mean: 0.6228995263576508 | loss/std: 0.06629428896088702 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.5850900769233703 | loss/mean: 0.5850900769233703 | loss/std: 0.04060201492703492 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_100/train.15.pth	0.5851
train (1/15) loss: 1.0983984470367432 | loss/mean: 1.0983984470367432 | loss/std: 0.01804482762775491 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 1.082124161720276 | loss/mean: 1.082124161720276 | loss/std: 0.015052284947255586 | lr: 0.02 | mome

* Epoch (12/15) 
train (13/15) loss: 0.65016930103302 | loss/mean: 0.65016930103302 | loss/std: 0.10086828039719903 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.6377093553543091 | loss/mean: 0.6377093553543091 | loss/std: 0.08346774496077516 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.6033185064792633 | loss/mean: 0.6033185064792633 | loss/std: 0.042374686181641134 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.5747979521751404 | loss/mean: 0.5747979521751404 | loss/std: 0.03976983426528879 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.6169129967689514 | loss/mean: 0.6169129967689514 | loss/std: 0.07711259794803109 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.5889699935913086 | loss/mean: 0.5889699935913086 | loss/std: 0.03417748088086927 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_102/train.14.pth	0.5748
train (1/15) loss: 1.0983984470367432 | loss/mean: 1.0983984470367432 | loss/std: 0.01804482762775491 |

valid (12/15) loss: 0.5939939498901368 | loss/mean: 0.5939939498901368 | loss/std: 0.038445901391211436 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.65016930103302 | loss/mean: 0.65016930103302 | loss/std: 0.10086828039719903 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.6377093553543091 | loss/mean: 0.6377093553543091 | loss/std: 0.08346774496077516 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.6033185064792633 | loss/mean: 0.6033185064792633 | loss/std: 0.042374686181641134 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.5747979521751404 | loss/mean: 0.5747979521751404 | loss/std: 0.03976983426528879 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.6169129967689514 | loss/mean: 0.6169129967689514 | loss/std: 0.07711259794803109 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.5889699935913086 | loss/mean: 0.5889699935913086 | loss/std: 0.03417748088086927 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/tr

train (12/15) loss: 0.6119095385074615 | loss/mean: 0.6119095385074615 | loss/std: 0.06857463061523704 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.5883600234985351 | loss/mean: 0.5883600234985351 | loss/std: 0.031785285512692704 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.5781044900417327 | loss/mean: 0.5781044900417327 | loss/std: 0.033941876585082215 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.5767422556877136 | loss/mean: 0.5767422556877136 | loss/std: 0.040743415778664166 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.595762151479721 | loss/mean: 0.595762151479721 | loss/std: 0.06136546832793056 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.5863414168357849 | loss/mean: 0.5863414168357849 | loss/std: 0.0320346221530689 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.5973287999629975 | loss/mean: 0.5973287999629975 | loss/std: 0.03814160511972345 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.5775201201438904 

valid (11/15) loss: 0.5891751885414124 | loss/mean: 0.5891751885414124 | loss/std: 0.036471179003697625 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.5809834837913513 | loss/mean: 0.5809834837913513 | loss/std: 0.03907429464727779 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.5793206214904785 | loss/mean: 0.5793206214904785 | loss/std: 0.04026283937653754 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.6085160493850709 | loss/mean: 0.6085160493850709 | loss/std: 0.06595443279761418 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.5864811420440674 | loss/mean: 0.5864811420440674 | loss/std: 0.0386465517672467 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.5802724301815032 | loss/mean: 0.5802724301815032 | loss/std: 0.04736034121828588 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.5772729992866517 | loss/mean: 0.5772729992866517 | loss/std: 0.04200561937908476 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.

train (11/15) loss: 0.6182298302650451 | loss/mean: 0.6182298302650451 | loss/std: 0.03886311135176334 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.6029221057891846 | loss/mean: 0.6029221057891846 | loss/std: 0.034001449495426196 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.61439009308815 | loss/mean: 0.61439009308815 | loss/std: 0.03871971990939054 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.5999114036560058 | loss/mean: 0.5999114036560058 | loss/std: 0.03321840693023734 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.6109968364238739 | loss/mean: 0.6109968364238739 | loss/std: 0.038333074600926574 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.5974763989448547 | loss/mean: 0.5974763989448547 | loss/std: 0.03262802740852881 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.6083979964256286 | loss/mean: 0.6083979964256286 | loss/std: 0.0383891559937522 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.5955967307090759 | l

valid (10/15) loss: 0.590432858467102 | loss/mean: 0.590432858467102 | loss/std: 0.03845514974654346 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.5797404825687408 | loss/mean: 0.5797404825687408 | loss/std: 0.03815722663666372 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.5763253331184387 | loss/mean: 0.5763253331184387 | loss/std: 0.041779684119610805 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.6044330716133117 | loss/mean: 0.6044330716133117 | loss/std: 0.061359446314237044 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.5882058382034302 | loss/mean: 0.5882058382034302 | loss/std: 0.03792192502499626 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.6018344581127166 | loss/mean: 0.6018344581127166 | loss/std: 0.053969562985620785 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.5779017806053162 | loss/mean: 0.5779017806053162 | loss/std: 0.03846559372488683 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0

train (10/15) loss: 0.6614581763744355 | loss/mean: 0.6614581763744355 | loss/std: 0.07863462102037609 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.6017969965934753 | loss/mean: 0.6017969965934753 | loss/std: 0.04961737308676473 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.6047964811325073 | loss/mean: 0.6047964811325073 | loss/std: 0.053836304677360516 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.5842337131500244 | loss/mean: 0.5842337131500244 | loss/std: 0.037322183113070805 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.6014185726642608 | loss/mean: 0.6014185726642608 | loss/std: 0.0552712114689829 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.5754175305366516 | loss/mean: 0.5754175305366516 | loss/std: 0.03854847246400713 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.6903188705444336 | loss/mean: 0.6903188705444336 | loss/std: 0.116254050330296 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.6574488282203674 |

valid (9/15) loss: 0.6176867604255676 | loss/mean: 0.6176867604255676 | loss/std: 0.09411068212754559 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.6614581763744355 | loss/mean: 0.6614581763744355 | loss/std: 0.07863462102037609 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.6017969965934753 | loss/mean: 0.6017969965934753 | loss/std: 0.04961737308676473 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.6047964811325073 | loss/mean: 0.6047964811325073 | loss/std: 0.053836304677360516 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.5842337131500244 | loss/mean: 0.5842337131500244 | loss/std: 0.037322183113070805 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.6014185726642608 | loss/mean: 0.6014185726642608 | loss/std: 0.0552712114689829 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.5754175305366516 | loss/mean: 0.5754175305366516 | loss/std: 0.03854847246400713 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.6

* Epoch (8/15) 
train (9/15) loss: 0.6144924342632293 | loss/mean: 0.6144924342632293 | loss/std: 0.06580485853316159 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.6176867604255676 | loss/mean: 0.6176867604255676 | loss/std: 0.09411068212754559 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.6614581763744355 | loss/mean: 0.6614581763744355 | loss/std: 0.07863462102037609 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.6017969965934753 | loss/mean: 0.6017969965934753 | loss/std: 0.04961737308676473 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.6047964811325073 | loss/mean: 0.6047964811325073 | loss/std: 0.053836304677360516 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.5842337131500244 | loss/mean: 0.5842337131500244 | loss/std: 0.037322183113070805 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.6014185726642608 | loss/mean: 0.6014185726642608 | loss/std: 0.0552712114689829 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.575

train (9/15) loss: 0.90144482254982 | loss/mean: 0.90144482254982 | loss/std: 0.10298571081669804 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.8514448404312134 | loss/mean: 0.8514448404312134 | loss/std: 0.20203053299435775 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.90144482254982 | loss/mean: 0.90144482254982 | loss/std: 0.10298571081669804 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.8514448404312134 | loss/mean: 0.8514448404312134 | loss/std: 0.20203053299435775 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.90144482254982 | loss/mean: 0.90144482254982 | loss/std: 0.10298571081669804 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.8514448404312134 | loss/mean: 0.8514448404312134 | loss/std: 0.20203053299435775 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.90144482254982 | loss/mean: 0.90144482254982 | loss/std: 0.10298571081669804 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.8514448404312134 | loss/mean: 0.8514

* Epoch (8/15) 
train (9/15) loss: 0.90144482254982 | loss/mean: 0.90144482254982 | loss/std: 0.10298571081669804 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.8514448404312134 | loss/mean: 0.8514448404312134 | loss/std: 0.20203053299435775 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.90144482254982 | loss/mean: 0.90144482254982 | loss/std: 0.10298571081669804 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.8514448404312134 | loss/mean: 0.8514448404312134 | loss/std: 0.20203053299435775 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.90144482254982 | loss/mean: 0.90144482254982 | loss/std: 0.10298571081669804 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.8514448404312134 | loss/mean: 0.8514448404312134 | loss/std: 0.20203053299435775 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.90144482254982 | loss/mean: 0.90144482254982 | loss/std: 0.10298571081669804 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.8514448404312134 | l

valid (8/15) loss: 0.6061147689819336 | loss/mean: 0.6061147689819336 | loss/std: 0.055619773470621185 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.6041605830192566 | loss/mean: 0.6041605830192566 | loss/std: 0.05819609263122187 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.5867648720741272 | loss/mean: 0.5867648720741272 | loss/std: 0.041162953602606994 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.5989545941352844 | loss/mean: 0.5989545941352844 | loss/std: 0.08411112414268375 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.5802655696868897 | loss/mean: 0.5802655696868897 | loss/std: 0.04272341354807883 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.6042952239513397 | loss/mean: 0.6042952239513397 | loss/std: 0.06178392668995883 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.6025177717208863 | loss/mean: 0.6025177717208863 | loss/std: 0.04257852859964991 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.620

* Epoch (7/15) 
train (8/15) loss: 0.6014527440071106 | loss/mean: 0.6014527440071106 | loss/std: 0.058445254991713984 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.563526964187622 | loss/mean: 0.563526964187622 | loss/std: 0.018749990192651268 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.6052125692367554 | loss/mean: 0.6052125692367554 | loss/std: 0.06182171864756872 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.5717703342437744 | loss/mean: 0.5717703342437744 | loss/std: 0.03383578998776456 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.6179565489292145 | loss/mean: 0.6179565489292145 | loss/std: 0.07286468349646136 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.5763066053390503 | loss/mean: 0.5763066053390503 | loss/std: 0.03722513783244443 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.6223597288131714 | loss/mean: 0.6223597288131714 | loss/std: 0.07773875080767581 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.5837981

valid (7/15) loss: 0.5635541439056396 | loss/mean: 0.5635541439056396 | loss/std: 0.012863210959208754 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 0.6014527440071106 | loss/mean: 0.6014527440071106 | loss/std: 0.058445254991713984 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.563526964187622 | loss/mean: 0.563526964187622 | loss/std: 0.018749990192651268 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.6052125692367554 | loss/mean: 0.6052125692367554 | loss/std: 0.06182171864756872 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.5717703342437744 | loss/mean: 0.5717703342437744 | loss/std: 0.03383578998776456 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.6179565489292145 | loss/mean: 0.6179565489292145 | loss/std: 0.07286468349646136 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.5763066053390503 | loss/mean: 0.5763066053390503 | loss/std: 0.03722513783244443 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.6223597

train (7/15) loss: 0.7100435137748718 | loss/mean: 0.7100435137748718 | loss/std: 0.11772137731558888 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 0.5635541439056396 | loss/mean: 0.5635541439056396 | loss/std: 0.012863210959208754 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 0.6014527440071106 | loss/mean: 0.6014527440071106 | loss/std: 0.058445254991713984 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.563526964187622 | loss/mean: 0.563526964187622 | loss/std: 0.018749990192651268 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.6052125692367554 | loss/mean: 0.6052125692367554 | loss/std: 0.06182171864756872 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.5717703342437744 | loss/mean: 0.5717703342437744 | loss/std: 0.03383578998776456 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.6179565489292145 | loss/mean: 0.6179565489292145 | loss/std: 0.07286468349646136 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.5763066053390503 | loss/m

valid (6/15) loss: 0.6803253412246704 | loss/mean: 0.6803253412246704 | loss/std: 0.10821824916058906 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 0.8822710573673248 | loss/mean: 0.8822710573673248 | loss/std: 0.17546851867189842 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 0.9351367235183716 | loss/mean: 0.9351367235183716 | loss/std: 0.16341628223685886 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 0.9167794942855835 | loss/mean: 0.9167794942855835 | loss/std: 0.11858644727988539 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.8714571833610535 | loss/mean: 0.8714571833610535 | loss/std: 0.1738030979490245 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.8914557635784149 | loss/mean: 0.8914557635784149 | loss/std: 0.13631870229564652 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.89144489169120

valid (6/15) loss: 0.6803253412246704 | loss/mean: 0.6803253412246704 | loss/std: 0.10821824916058906 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 0.8822710573673248 | loss/mean: 0.8822710573673248 | loss/std: 0.17546851867189842 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 0.9351367235183716 | loss/mean: 0.9351367235183716 | loss/std: 0.16341628223685886 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 0.9167794942855835 | loss/mean: 0.9167794942855835 | loss/std: 0.11858644727988539 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.8714571833610535 | loss/mean: 0.8714571833610535 | loss/std: 0.1738030979490245 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.8914557635784149 | loss/mean: 0.8914557635784149 | loss/std: 0.13631870229564652 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.89144489169120

train (6/15) loss: 0.6711041688919068 | loss/mean: 0.6711041688919068 | loss/std: 0.0769613015690868 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.6197689890861511 | loss/mean: 0.6197689890861511 | loss/std: 0.06443913257775774 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 0.6035266041755676 | loss/mean: 0.6035266041755676 | loss/std: 0.054670576017562075 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 0.5845841646194458 | loss/mean: 0.5845841646194458 | loss/std: 0.04648862328188689 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 0.6060123860836031 | loss/mean: 0.6060123860836031 | loss/std: 0.051312506106386165 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.5836203932762146 | loss/mean: 0.5836203932762146 | loss/std: 0.036846705007915 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.6508775591850281 | loss/mean: 0.6508775591850281 | loss/std: 0.08760060875259433 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.594603669643402 | loss/mean: 

valid (5/15) loss: 0.5766886591911315 | loss/mean: 0.5766886591911315 | loss/std: 0.0234025586062961 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.6621983528137206 | loss/mean: 0.6621983528137206 | loss/std: 0.08912734240616431 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.5820417881011963 | loss/mean: 0.5820417881011963 | loss/std: 0.029262240779238606 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 0.5877450048923493 | loss/mean: 0.5877450048923493 | loss/std: 0.043609981401272306 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 0.5792671799659729 | loss/mean: 0.5792671799659729 | loss/std: 0.044917618731740556 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 0.6001888394355774 | loss/mean: 0.6001888394355774 | loss/std: 0.05564050571517543 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.5769888281822204 | loss/mean: 0.5769888281822204 | loss/std: 0.0374247897311591 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.62069837450

train (5/15) loss: 0.6024005174636842 | loss/mean: 0.6024005174636842 | loss/std: 0.05300498809937561 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.5836950540542603 | loss/mean: 0.5836950540542603 | loss/std: 0.06236310409502967 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.6252368688583374 | loss/mean: 0.6252368688583374 | loss/std: 0.07458216146448958 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.5941004037857056 | loss/mean: 0.5941004037857056 | loss/std: 0.07941775973079936 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 0.6571373105049132 | loss/mean: 0.6571373105049132 | loss/std: 0.08248030075185284 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 0.5843241095542908 | loss/mean: 0.5843241095542908 | loss/std: 0.026584986424018634 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 0.631271094083786 | loss/mean: 0.631271094083786 | loss/std: 0.08448892933133195 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.5763566374778748 | loss/mean:

valid (4/15) loss: 0.5796833157539367 | loss/mean: 0.5796833157539367 | loss/std: 0.04467413176750997 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.6024005174636842 | loss/mean: 0.6024005174636842 | loss/std: 0.05300498809937561 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.5836950540542603 | loss/mean: 0.5836950540542603 | loss/std: 0.06236310409502967 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.6252368688583374 | loss/mean: 0.6252368688583374 | loss/std: 0.07458216146448958 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.5941004037857056 | loss/mean: 0.5941004037857056 | loss/std: 0.07941775973079936 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 0.6571373105049132 | loss/mean: 0.6571373105049132 | loss/std: 0.08248030075185284 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 0.5843241095542908 | loss/mean: 0.5843241095542908 | loss/std: 0.026584986424018634 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 0.63127109408

train (4/15) loss: 0.700477123260498 | loss/mean: 0.700477123260498 | loss/std: 0.0682356333582729 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.7035794734954834 | loss/mean: 0.7035794734954834 | loss/std: 0.11378249755304724 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.656436961889267 | loss/mean: 0.656436961889267 | loss/std: 0.06480632431264428 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.6025115251541138 | loss/mean: 0.6025115251541138 | loss/std: 0.07895939044384473 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.6181727170944213 | loss/mean: 0.6181727170944213 | loss/std: 0.07568742996590772 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.635368812084198 | loss/mean: 0.635368812084198 | loss/std: 0.07558737341729933 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 0.6057707071304321 | loss/mean: 0.6057707071304321 | loss/std: 0.036642477747455264 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 0.6350426077842712 | loss/mean: 0.63

valid (3/15) loss: 0.6645171165466308 | loss/mean: 0.6645171165466308 | loss/std: 0.10326882622108619 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.700477123260498 | loss/mean: 0.700477123260498 | loss/std: 0.0682356333582729 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.7035794734954834 | loss/mean: 0.7035794734954834 | loss/std: 0.11378249755304724 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.656436961889267 | loss/mean: 0.656436961889267 | loss/std: 0.06480632431264428 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.6025115251541138 | loss/mean: 0.6025115251541138 | loss/std: 0.07895939044384473 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.6181727170944213 | loss/mean: 0.6181727170944213 | loss/std: 0.07568742996590772 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.635368812084198 | loss/mean: 0.635368812084198 | loss/std: 0.07558737341729933 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 0.6057707071304321 | 

train (3/15) loss: 0.8183606266975403 | loss/mean: 0.8183606266975403 | loss/std: 0.06543725816909322 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.7663802981376648 | loss/mean: 0.7663802981376648 | loss/std: 0.10648789633847182 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.7561877787113189 | loss/mean: 0.7561877787113189 | loss/std: 0.04143847763230471 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.6840553045272827 | loss/mean: 0.6840553045272827 | loss/std: 0.0726919455329412 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.6429982304573059 | loss/mean: 0.6429982304573059 | loss/std: 0.059384268387501646 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.6657360196113586 | loss/mean: 0.6657360196113586 | loss/std: 0.0934438545773521 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.6707981169223786 | loss/mean: 0.6707981169223786 | loss/std: 0.13466146690272285 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.6585146427154541 | loss/mean:

valid (2/15) loss: 0.7868466377258301 | loss/mean: 0.7868466377258301 | loss/std: 0.08596554689018426 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.8183606266975403 | loss/mean: 0.8183606266975403 | loss/std: 0.06543725816909322 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.7663802981376648 | loss/mean: 0.7663802981376648 | loss/std: 0.10648789633847182 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.7561877787113189 | loss/mean: 0.7561877787113189 | loss/std: 0.04143847763230471 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.6840553045272827 | loss/mean: 0.6840553045272827 | loss/std: 0.0726919455329412 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.6429982304573059 | loss/mean: 0.6429982304573059 | loss/std: 0.059384268387501646 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.6657360196113586 | loss/mean: 0.6657360196113586 | loss/std: 0.0934438545773521 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.6707981169223

* Epoch (1/15) 
train (2/15) loss: 0.8920239806175232 | loss/mean: 0.8920239806175232 | loss/std: 0.13017456431361912 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.8763436436653137 | loss/mean: 0.8763436436653137 | loss/std: 0.1734367152791952 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.8924040913581849 | loss/mean: 0.8924040913581849 | loss/std: 0.13538482167846225 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.9045943498611451 | loss/mean: 0.9045943498611451 | loss/std: 0.14932896946011695 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.9587921023368835 | loss/mean: 0.9587921023368835 | loss/std: 0.16974278064764686 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.9625600814819337 | loss/mean: 0.9625600814819337 | loss/std: 0.1323225443970886 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.871444833278656 

* Epoch (1/15) 
train (2/15) loss: 0.8920239806175232 | loss/mean: 0.8920239806175232 | loss/std: 0.13017456431361912 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.8763436436653137 | loss/mean: 0.8763436436653137 | loss/std: 0.1734367152791952 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.8924040913581849 | loss/mean: 0.8924040913581849 | loss/std: 0.13538482167846225 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.9045943498611451 | loss/mean: 0.9045943498611451 | loss/std: 0.14932896946011695 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.9587921023368835 | loss/mean: 0.9587921023368835 | loss/std: 0.16974278064764686 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.9625600814819337 | loss/mean: 0.9625600814819337 | loss/std: 0.1323225443970886 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.871444833278656 

* Epoch (1/15) 
train (2/15) loss: 0.8920239806175232 | loss/mean: 0.8920239806175232 | loss/std: 0.13017456431361912 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.8763436436653137 | loss/mean: 0.8763436436653137 | loss/std: 0.1734367152791952 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.8924040913581849 | loss/mean: 0.8924040913581849 | loss/std: 0.13538482167846225 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.9045943498611451 | loss/mean: 0.9045943498611451 | loss/std: 0.14932896946011695 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.9587921023368835 | loss/mean: 0.9587921023368835 | loss/std: 0.16974278064764686 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.9625600814819337 | loss/mean: 0.9625600814819337 | loss/std: 0.1323225443970886 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.871444833278656 

train (2/15) loss: 0.7902003407478333 | loss/mean: 0.7902003407478333 | loss/std: 0.04512313770435682 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.6945424675941467 | loss/mean: 0.6945424675941467 | loss/std: 0.05344829434765063 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.6738126337528229 | loss/mean: 0.6738126337528229 | loss/std: 0.03968113412377241 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.67219477891922 | loss/mean: 0.67219477891922 | loss/std: 0.089896773924014 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.6389274835586548 | loss/mean: 0.6389274835586548 | loss/std: 0.04248619972120637 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.6496837854385376 | loss/mean: 0.6496837854385376 | loss/std: 0.0844563936888784 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.6168635547161103 | loss/mean: 0.6168635547161103 | loss/std: 0.04872726704321228 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.6373286604881286 | loss/mean: 0.637

train (2/15) loss: 0.7307303607463836 | loss/mean: 0.7307303607463836 | loss/std: 0.06861286570180088 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.6297721862792969 | loss/mean: 0.6297721862792969 | loss/std: 0.050385939321268995 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.6391199231147766 | loss/mean: 0.6391199231147766 | loss/std: 0.08075451185277889 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.5829273939132691 | loss/mean: 0.5829273939132691 | loss/std: 0.06011829779170995 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.6247496962547301 | loss/mean: 0.6247496962547301 | loss/std: 0.07744842463067152 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.7355736374855042 | loss/mean: 0.7355736374855042 | loss/std: 0.07006853138376212 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.7525958776474 | loss/mean: 0.7525958776474 | loss/std: 0.09900264926038298 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.6291339635848999 | loss/mean: 0.6

train (2/15) loss: 0.7307303607463836 | loss/mean: 0.7307303607463836 | loss/std: 0.06861286570180088 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.6297721862792969 | loss/mean: 0.6297721862792969 | loss/std: 0.050385939321268995 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.6391199231147766 | loss/mean: 0.6391199231147766 | loss/std: 0.08075451185277889 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.5829273939132691 | loss/mean: 0.5829273939132691 | loss/std: 0.06011829779170995 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.6247496962547301 | loss/mean: 0.6247496962547301 | loss/std: 0.07744842463067152 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.7355736374855042 | loss/mean: 0.7355736374855042 | loss/std: 0.07006853138376212 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.7525958776474 | loss/mean: 0.7525958776474 | loss/std: 0.09900264926038298 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.6291339635848999 | loss/mean: 0.6

valid (1/15) loss: 0.8688402295112609 | loss/mean: 0.8688402295112609 | loss/std: 0.09507685258712488 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.8550123989582061 | loss/mean: 0.8550123989582061 | loss/std: 0.0674731395429997 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7809401988983155 | loss/mean: 0.7809401988983155 | loss/std: 0.06854020607269162 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.9268040537834167 | loss/mean: 0.9268040537834167 | loss/std: 0.20257802508167097 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.8369949221611023 | loss/mean: 0.8369949221611023 | loss/std: 0.18706292519967752 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.9769105494022369 | loss/mean: 0.9769105494022369 | loss/std: 0.174935735828079 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 1.231253707408905 | loss/mean: 1.231253707408905 | loss/std: 0.17355106860037217 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 1.2114430367946623 

train (1/15) loss: 1.0715902030467988 | loss/mean: 1.0715902030467988 | loss/std: 0.046043627208974806 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.8688402295112609 | loss/mean: 0.8688402295112609 | loss/std: 0.09507685258712488 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.8550123989582061 | loss/mean: 0.8550123989582061 | loss/std: 0.0674731395429997 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7809401988983155 | loss/mean: 0.7809401988983155 | loss/std: 0.06854020607269162 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.9268040537834167 | loss/mean: 0.9268040537834167 | loss/std: 0.20257802508167097 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.8369949221611023 | loss/mean: 0.8369949221611023 | loss/std: 0.18706292519967752 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.9769105494022369 | loss/mean: 0.9769105494022369 | loss/std: 0.174935735828079 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 1.231253707408905 | loss/mean: 1

train (1/15) loss: 1.0715902030467988 | loss/mean: 1.0715902030467988 | loss/std: 0.046043627208974806 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.8688402295112609 | loss/mean: 0.8688402295112609 | loss/std: 0.09507685258712488 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.8550123989582061 | loss/mean: 0.8550123989582061 | loss/std: 0.0674731395429997 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7809401988983155 | loss/mean: 0.7809401988983155 | loss/std: 0.06854020607269162 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.9268040537834167 | loss/mean: 0.9268040537834167 | loss/std: 0.20257802508167097 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.8369949221611023 | loss/mean: 0.8369949221611023 | loss/std: 0.18706292519967752 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.9769105494022369 | loss/mean: 0.9769105494022369 | loss/std: 0.174935735828079 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 1.231253707408905 | loss/mean: 1

train (1/15) loss: 1.0271628141403197 | loss/mean: 1.0271628141403197 | loss/std: 0.08011533987674933 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.8024430990219116 | loss/mean: 0.8024430990219116 | loss/std: 0.13417336767851695 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.850147432088852 | loss/mean: 0.850147432088852 | loss/std: 0.09207361759520463 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.8253880858421325 | loss/mean: 0.8253880858421325 | loss/std: 0.18007514664912297 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.8473841190338136 | loss/mean: 0.8473841190338136 | loss/std: 0.06495973146357793 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.779738438129425 | loss/mean: 0.779738438129425 | loss/std: 0.11971969210784698 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.7665765106678009 | loss/mean: 0.7665765106678009 | loss/std: 0.04829552452845042 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.6659675359725952 | loss/mean: 0.

train (1/15) loss: 1.0271628141403197 | loss/mean: 1.0271628141403197 | loss/std: 0.08011533987674933 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.8024430990219116 | loss/mean: 0.8024430990219116 | loss/std: 0.13417336767851695 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.850147432088852 | loss/mean: 0.850147432088852 | loss/std: 0.09207361759520463 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.8253880858421325 | loss/mean: 0.8253880858421325 | loss/std: 0.18007514664912297 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.8473841190338136 | loss/mean: 0.8473841190338136 | loss/std: 0.06495973146357793 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.779738438129425 | loss/mean: 0.779738438129425 | loss/std: 0.11971969210784698 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.7665765106678009 | loss/mean: 0.7665765106678009 | loss/std: 0.04829552452845042 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.6659675359725952 | loss/mean: 0.

* Epoch (15/15) 
Top best models:
logs/trial_173/train.8.pth	0.5769
train (1/15) loss: 1.0271628141403197 | loss/mean: 1.0271628141403197 | loss/std: 0.08011533987674933 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.8024430990219116 | loss/mean: 0.8024430990219116 | loss/std: 0.13417336767851695 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.850147432088852 | loss/mean: 0.850147432088852 | loss/std: 0.09207361759520463 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.8253880858421325 | loss/mean: 0.8253880858421325 | loss/std: 0.18007514664912297 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.8473841190338136 | loss/mean: 0.8473841190338136 | loss/std: 0.06495973146357793 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.779738438129425 | loss/mean: 0.779738438129425 | loss/std: 0.11971969210784698 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.7665765106678009 | loss/mean: 0.7665765106678009 | loss/std: 0.04829552452845042 | lr: 0.02 |

valid (15/15) loss: 0.6104251623153687 | loss/mean: 0.6104251623153687 | loss/std: 0.03338658504189894 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_175/train.15.pth	0.6104
train (1/15) loss: 0.9638923227787017 | loss/mean: 0.9638923227787017 | loss/std: 0.10752718882833515 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.7577644467353821 | loss/mean: 0.7577644467353821 | loss/std: 0.10374529008421728 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.7335935950279237 | loss/mean: 0.7335935950279237 | loss/std: 0.055957445808222886 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.6460588335990906 | loss/mean: 0.6460588335990906 | loss/std: 0.06000162392362312 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.6383655548095704 | loss/mean: 0.6383655548095704 | loss/std: 0.050545682172800914 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6371726155281067 | loss/mean: 0.6371726155281067 | loss/std: 0.07181789562613757 | lr: 0.02 | momentu

train (15/15) loss: 0.5955860555171968 | loss/mean: 0.5955860555171968 | loss/std: 0.07650540614320352 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.5755530595779419 | loss/mean: 0.5755530595779419 | loss/std: 0.04530276705187019 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_177/train.11.pth	0.5739
train (1/15) loss: 1.0976144194602966 | loss/mean: 1.0976144194602966 | loss/std: 0.02165851032725488 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 1.0855967283248902 | loss/mean: 1.0855967283248902 | loss/std: 0.020967439072034997 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 1.0729287147521973 | loss/mean: 1.0729287147521973 | loss/std: 0.017232221743970863 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 1.0440883398056031 | loss/mean: 1.0440883398056031 | loss/std: 0.020057703468819417 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 1.0166244387626648 | loss/mean: 1.0166244387626648 | loss/std: 0.02730380586335352 | lr: 0.02 | momen

valid (14/15) loss: 0.5863481879234314 | loss/mean: 0.5863481879234314 | loss/std: 0.037881709869894455 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.6168371737003326 | loss/mean: 0.6168371737003326 | loss/std: 0.062882851553149 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.5930473923683166 | loss/mean: 0.5930473923683166 | loss/std: 0.03417190896558172 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_179/train.14.pth	0.5863
train (1/15) loss: 1.107542097568512 | loss/mean: 1.107542097568512 | loss/std: 0.034116007683033266 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 1.0886850118637086 | loss/mean: 1.0886850118637086 | loss/std: 0.016755348794917625 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.9896030485630035 | loss/mean: 0.9896030485630035 | loss/std: 0.09960465109389217 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7803579568862915 | loss/mean: 0.7803579568862915 | loss/std: 0.13944562564100874 | lr: 0.02 | momentu

train (14/15) loss: 1.2414449214935306 | loss/mean: 1.2414449214935306 | loss/std: 0.1452966486891931 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 1.1714448928833008 | loss/mean: 1.1714448928833008 | loss/std: 0.18516406672882949 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 1.2414449214935306 | loss/mean: 1.2414449214935306 | loss/std: 0.1452966486891931 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 1.1714448928833008 | loss/mean: 1.1714448928833008 | loss/std: 0.18516406672882949 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_181/train.3.pth	0.6926
train (1/15) loss: 1.107542097568512 | loss/mean: 1.107542097568512 | loss/std: 0.034116007683033266 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 1.0886850118637086 | loss/mean: 1.0886850118637086 | loss/std: 0.016755348794917625 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.9896030485630035 | loss/mean: 0.9896030485630035 | loss/std: 0.09960465109389217 | lr: 0.02 | momentum

train (14/15) loss: 1.2414449214935306 | loss/mean: 1.2414449214935306 | loss/std: 0.1452966486891931 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 1.1714448928833008 | loss/mean: 1.1714448928833008 | loss/std: 0.18516406672882949 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 1.2414449214935306 | loss/mean: 1.2414449214935306 | loss/std: 0.1452966486891931 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 1.1714448928833008 | loss/mean: 1.1714448928833008 | loss/std: 0.18516406672882949 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_183/train.3.pth	0.6926
train (1/15) loss: 1.107542097568512 | loss/mean: 1.107542097568512 | loss/std: 0.034116007683033266 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 1.0886850118637086 | loss/mean: 1.0886850118637086 | loss/std: 0.016755348794917625 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.9896030485630035 | loss/mean: 0.9896030485630035 | loss/std: 0.09960465109389217 | lr: 0.02 | momentum

valid (13/15) loss: 0.5731709957122803 | loss/mean: 0.5731709957122803 | loss/std: 0.04299002311384184 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.7815186440944671 | loss/mean: 0.7815186440944671 | loss/std: 0.15622405184047372 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.851400625705719 | loss/mean: 0.851400625705719 | loss/std: 0.20200558863214887 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.900993925333023 | loss/mean: 0.900993925333023 | loss/std: 0.10721551622278938 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.8444585084915162 | loss/mean: 0.8444585084915162 | loss/std: 0.19891131763264958 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_185/train.12.pth	0.5728
train (1/15) loss: 1.0078919589519502 | loss/mean: 1.0078919589519502 | loss/std: 0.08323488286303762 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.7847793102264404 | loss/mean: 0.7847793102264404 | loss/std: 0.06945112096362839 | lr: 0.02 | momentu

train (13/15) loss: 0.6175835549831391 | loss/mean: 0.6175835549831391 | loss/std: 0.0691020190149878 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.5731709957122803 | loss/mean: 0.5731709957122803 | loss/std: 0.04299002311384184 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.7815186440944671 | loss/mean: 0.7815186440944671 | loss/std: 0.15622405184047372 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.851400625705719 | loss/mean: 0.851400625705719 | loss/std: 0.20200558863214887 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.900993925333023 | loss/mean: 0.900993925333023 | loss/std: 0.10721551622278938 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.8444585084915162 | loss/mean: 0.8444585084915162 | loss/std: 0.19891131763264958 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_187/train.12.pth	0.5728
train (1/15) loss: 1.004421466588974 | loss/mean: 1.004421466588974 | loss/std: 0.07205552945466093 | lr: 0.02 | momentum:

valid (12/15) loss: 0.589236855506897 | loss/mean: 0.589236855506897 | loss/std: 0.03704582614670128 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.580253130197525 | loss/mean: 0.580253130197525 | loss/std: 0.029570998245225282 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.5768483996391296 | loss/mean: 0.5768483996391296 | loss/std: 0.0394758110105904 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.6112036526203155 | loss/mean: 0.6112036526203155 | loss/std: 0.07128800657209933 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.5870311856269836 | loss/mean: 0.5870311856269836 | loss/std: 0.03790896008097083 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.5787550985813139 | loss/mean: 0.5787550985813139 | loss/std: 0.04806372394756 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.575581169128418 | loss/mean: 0.575581169128418 | loss/std: 0.04407019547153713 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_189

valid (12/15) loss: 1.251444923877716 | loss/mean: 1.251444923877716 | loss/std: 0.2020305028894651 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 1.201444923877716 | loss/mean: 1.201444923877716 | loss/std: 0.10298575466259888 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 1.251444923877716 | loss/mean: 1.251444923877716 | loss/std: 0.2020305028894651 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 1.201444923877716 | loss/mean: 1.201444923877716 | loss/std: 0.10298575466259888 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 1.251444923877716 | loss/mean: 1.251444923877716 | loss/std: 0.2020305028894651 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 1.201444923877716 | loss/mean: 1.201444923877716 | loss/std: 0.10298575466259888 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 1.251444923877716 | loss/mean: 1.251444923877716 | loss/std: 0.2020305028894651 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_191/train.5.

valid (12/15) loss: 0.5874445915222168 | loss/mean: 0.5874445915222168 | loss/std: 0.027084141649976544 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.6027984082698822 | loss/mean: 0.6027984082698822 | loss/std: 0.04302648030371633 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.5866904497146607 | loss/mean: 0.5866904497146607 | loss/std: 0.02704639670485597 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.6008744120597839 | loss/mean: 0.6008744120597839 | loss/std: 0.0418998197447985 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.5856472134590149 | loss/mean: 0.5856472134590149 | loss/std: 0.02693912921464053 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.6002524077892304 | loss/mean: 0.6002524077892304 | loss/std: 0.04433631174074445 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.5853976368904114 | loss/mean: 0.5853976368904114 | loss/std: 0.027008227393606834 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs

train (12/15) loss: 0.6108015418052674 | loss/mean: 0.6108015418052674 | loss/std: 0.06904828759890864 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.5718968152999878 | loss/mean: 0.5718968152999878 | loss/std: 0.03914441083378243 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.6153755068778992 | loss/mean: 0.6153755068778992 | loss/std: 0.07613929256781175 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.5720436215400696 | loss/mean: 0.5720436215400696 | loss/std: 0.037708135424444726 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.6160523474216462 | loss/mean: 0.6160523474216462 | loss/std: 0.10879287982098666 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.6608900785446167 | loss/mean: 0.6608900785446167 | loss/std: 0.10386222831995882 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.7198710024356842 | loss/mean: 0.7198710024356842 | loss/std: 0.08791889585515282 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.6662110805511474

train (12/15) loss: 0.9323942422866821 | loss/mean: 0.9323942422866821 | loss/std: 0.14828707975840977 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.8841424942016601 | loss/mean: 0.8841424942016601 | loss/std: 0.16683790445528873 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.8944852232933045 | loss/mean: 0.8944852232933045 | loss/std: 0.13342796748140892 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.8562671780586243 | loss/mean: 0.8562671780586243 | loss/std: 0.18664802125360413 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.8182979881763458 | loss/mean: 0.8182979881763458 | loss/std: 0.11724406817076662 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.8602057218551635 | loss/mean: 0.8602057218551635 | loss/std: 0.2151483253031975 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.9314522206783294 | loss/mean: 0.9314522206783294 | loss/std: 0.11720274139637385 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.8685179710388183 |

train (12/15) loss: 0.9323942422866821 | loss/mean: 0.9323942422866821 | loss/std: 0.14828707975840977 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.8841424942016601 | loss/mean: 0.8841424942016601 | loss/std: 0.16683790445528873 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.8944852232933045 | loss/mean: 0.8944852232933045 | loss/std: 0.13342796748140892 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.8562671780586243 | loss/mean: 0.8562671780586243 | loss/std: 0.18664802125360413 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.8182979881763458 | loss/mean: 0.8182979881763458 | loss/std: 0.11724406817076662 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.8602057218551635 | loss/mean: 0.8602057218551635 | loss/std: 0.2151483253031975 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.9314522206783294 | loss/mean: 0.9314522206783294 | loss/std: 0.11720274139637385 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.8685179710388183 |

valid (11/15) loss: 0.6477510213851929 | loss/mean: 0.6477510213851929 | loss/std: 0.08951066169007528 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.7117015242576599 | loss/mean: 0.7117015242576599 | loss/std: 0.1226500936855162 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.8032989859580993 | loss/mean: 0.8032989859580993 | loss/std: 0.1619335534531186 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.8499319374561309 | loss/mean: 0.8499319374561309 | loss/std: 0.09706332146399958 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.8498019695281982 | loss/mean: 0.8498019695281982 | loss/std: 0.2009342630324869 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.8687819659709931 | loss/mean: 0.8687819659709931 | loss/std: 0.11060131289368985 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.8714123845100403 | loss/mean: 0.8714123845100403 | loss/std: 0.17373286787145448 | lr: 0.02 | momentum: 0.9
* Epoch (14/15) 
train (15/15) loss: 0.891

train (11/15) loss: 0.7186296463012696 | loss/mean: 0.7186296463012696 | loss/std: 0.1187540998563771 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.6477510213851929 | loss/mean: 0.6477510213851929 | loss/std: 0.08951066169007528 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.7117015242576599 | loss/mean: 0.7117015242576599 | loss/std: 0.1226500936855162 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.8032989859580993 | loss/mean: 0.8032989859580993 | loss/std: 0.1619335534531186 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.8499319374561309 | loss/mean: 0.8499319374561309 | loss/std: 0.09706332146399958 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.8498019695281982 | loss/mean: 0.8498019695281982 | loss/std: 0.2009342630324869 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.8687819659709931 | loss/mean: 0.8687819659709931 | loss/std: 0.11060131289368985 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.8714123845100403 | lo

train (11/15) loss: 0.9014448702335357 | loss/mean: 0.9014448702335357 | loss/std: 0.1029857517395451 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.9014448702335357 | loss/mean: 0.9014448702335357 | loss/std: 0.1029857517395451 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.9014448702335357 | loss/mean: 0.9014448702335357 | loss/std: 0.1029857517395451 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.9014448702335357 | loss/mean: 0.9014448702335357 | loss/std: 0.1029857517395451 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.8514448642730713 | lo

train (11/15) loss: 0.9014448702335357 | loss/mean: 0.9014448702335357 | loss/std: 0.1029857517395451 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.9014448702335357 | loss/mean: 0.9014448702335357 | loss/std: 0.1029857517395451 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.9014448702335357 | loss/mean: 0.9014448702335357 | loss/std: 0.1029857517395451 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.8514448642730713 | loss/mean: 0.8514448642730713 | loss/std: 0.20203054503631412 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.9014448702335357 | loss/mean: 0.9014448702335357 | loss/std: 0.1029857517395451 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.8514448642730713 | lo

train (11/15) loss: 0.6911276757717133 | loss/mean: 0.6911276757717133 | loss/std: 0.09305019185518623 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.7320736646652222 | loss/mean: 0.7320736646652222 | loss/std: 0.11804866525515047 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.6613633453845978 | loss/mean: 0.6613633453845978 | loss/std: 0.1096002113533534 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.6525016307830811 | loss/mean: 0.6525016307830811 | loss/std: 0.1081775237557346 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.6405651211738587 | loss/mean: 0.6405651211738587 | loss/std: 0.10223107918854422 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.5730375289916992 | loss/mean: 0.5730375289916992 | loss/std: 0.03922395498297058 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.6284816205501557 | loss/mean: 0.6284816205501557 | loss/std: 0.08260618129689416 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.6417853236198425 | 

train (11/15) loss: 0.6911276757717133 | loss/mean: 0.6911276757717133 | loss/std: 0.09305019185518623 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.7320736646652222 | loss/mean: 0.7320736646652222 | loss/std: 0.11804866525515047 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.6613633453845978 | loss/mean: 0.6613633453845978 | loss/std: 0.1096002113533534 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.6525016307830811 | loss/mean: 0.6525016307830811 | loss/std: 0.1081775237557346 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.6405651211738587 | loss/mean: 0.6405651211738587 | loss/std: 0.10223107918854422 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.5730375289916992 | loss/mean: 0.5730375289916992 | loss/std: 0.03922395498297058 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.6284816205501557 | loss/mean: 0.6284816205501557 | loss/std: 0.08260618129689416 | lr: 0.02 | momentum: 0.9
valid (14/15) loss: 0.6417853236198425 | 

valid (10/15) loss: 0.5933231234550476 | loss/mean: 0.5933231234550476 | loss/std: 0.03086840763205576 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.6071759641170502 | loss/mean: 0.6071759641170502 | loss/std: 0.0422825808047517 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.5915477871894836 | loss/mean: 0.5915477871894836 | loss/std: 0.030587689389707854 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.6052694141864777 | loss/mean: 0.6052694141864777 | loss/std: 0.043196061528828274 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.5903735756874084 | loss/mean: 0.5903735756874084 | loss/std: 0.030323060785096834 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.603071254491806 | loss/mean: 0.603071254491806 | loss/std: 0.04264229018895157 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.5890695810317993 | loss/mean: 0.5890695810317993 | loss/std: 0.03022800116866041 | lr: 0.02 | momentum: 0.9
* Epoch (13/15) 
train (14/15) loss: 0.

train (10/15) loss: 0.6169148504734039 | loss/mean: 0.6169148504734039 | loss/std: 0.06396129247749102 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.5916643977165222 | loss/mean: 0.5916643977165222 | loss/std: 0.036554267051914516 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.5991400837898254 | loss/mean: 0.5991400837898254 | loss/std: 0.03837761165297083 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.5819789886474609 | loss/mean: 0.5819789886474609 | loss/std: 0.0399203125731546 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.628233677148819 | loss/mean: 0.628233677148819 | loss/std: 0.06750287128793131 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.593654990196228 | loss/mean: 0.593654990196228 | loss/std: 0.03989894695546094 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.5921411395072936 | loss/mean: 0.5921411395072936 | loss/std: 0.029702846575903358 | lr: 0.02 | momentum: 0.9
valid (13/15) loss: 0.5794288039207458 | l

valid (9/15) loss: 0.5759727716445923 | loss/mean: 0.5759727716445923 | loss/std: 0.039932975144632496 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.6134540975093842 | loss/mean: 0.6134540975093842 | loss/std: 0.07248780305423731 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.5879441380500794 | loss/mean: 0.5879441380500794 | loss/std: 0.038818614472750916 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.585090607404709 | loss/mean: 0.585090607404709 | loss/std: 0.05786129793642875 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.5775721549987793 | loss/mean: 0.5775721549987793 | loss/std: 0.04931570447405215 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.5925187826156616 | loss/mean: 0.5925187826156616 | loss/std: 0.06407923455460238 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.5861128330230713 | loss/mean: 0.5861128330230713 | loss/std: 0.03889977884157837 | lr: 0.02 | momentum: 0.9
* Epoch (12/15) 
train (13/15) loss: 0.61

train (9/15) loss: 0.8332014441490174 | loss/mean: 0.8332014441490174 | loss/std: 0.060155578617527385 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.8038842916488648 | loss/mean: 0.8038842916488648 | loss/std: 0.07507414164523553 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.8332644879817963 | loss/mean: 0.8332644879817963 | loss/std: 0.060372091608930104 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.8039766311645508 | loss/mean: 0.8039766311645508 | loss/std: 0.07484413889096088 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.8331350922584533 | loss/mean: 0.8331350922584533 | loss/std: 0.06056323210151346 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.8039574146270752 | loss/mean: 0.8039574146270752 | loss/std: 0.07483774557422407 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.8330639064311981 | loss/mean: 0.8330639064311981 | loss/std: 0.06048137116164096 | lr: 0.02 | momentum: 0.9
valid (12/15) loss: 0.8039016485214233 |

valid (8/15) loss: 0.8039021492004395 | loss/mean: 0.8039021492004395 | loss/std: 0.07511564006951466 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.8332014441490174 | loss/mean: 0.8332014441490174 | loss/std: 0.060155578617527385 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.8038842916488648 | loss/mean: 0.8038842916488648 | loss/std: 0.07507414164523553 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.8332644879817963 | loss/mean: 0.8332644879817963 | loss/std: 0.060372091608930104 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.8039766311645508 | loss/mean: 0.8039766311645508 | loss/std: 0.07484413889096088 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.8331350922584533 | loss/mean: 0.8331350922584533 | loss/std: 0.06056323210151346 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.8039574146270752 | loss/mean: 0.8039574146270752 | loss/std: 0.07483774557422407 | lr: 0.02 | momentum: 0.9
* Epoch (11/15) 
train (12/15) loss: 0.833

train (8/15) loss: 0.676740539073944 | loss/mean: 0.676740539073944 | loss/std: 0.09801628460469912 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.5753503680229187 | loss/mean: 0.5753503680229187 | loss/std: 0.03726280673613505 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.6312633216381073 | loss/mean: 0.6312633216381073 | loss/std: 0.0784283334545865 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.5742327570915222 | loss/mean: 0.5742327570915222 | loss/std: 0.03693795087353833 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.7733730971813202 | loss/mean: 0.7733730971813202 | loss/std: 0.1404103601338587 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.8465590953826905 | loss/mean: 0.8465590953826905 | loss/std: 0.19973536980301898 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.7982619941234589 | loss/mean: 0.7982619941234589 | loss/std: 0.1176036211779876 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.5744957804679871 | loss/mean

train (8/15) loss: 0.676740539073944 | loss/mean: 0.676740539073944 | loss/std: 0.09801628460469912 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.5753503680229187 | loss/mean: 0.5753503680229187 | loss/std: 0.03726280673613505 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.6312633216381073 | loss/mean: 0.6312633216381073 | loss/std: 0.0784283334545865 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.5742327570915222 | loss/mean: 0.5742327570915222 | loss/std: 0.03693795087353833 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.7733730971813202 | loss/mean: 0.7733730971813202 | loss/std: 0.1404103601338587 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.8465590953826905 | loss/mean: 0.8465590953826905 | loss/std: 0.19973536980301898 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.7982619941234589 | loss/mean: 0.7982619941234589 | loss/std: 0.1176036211779876 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.5744957804679871 | loss/mean

* Epoch (7/15) 
train (8/15) loss: 0.676740539073944 | loss/mean: 0.676740539073944 | loss/std: 0.09801628460469912 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.5753503680229187 | loss/mean: 0.5753503680229187 | loss/std: 0.03726280673613505 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.6312633216381073 | loss/mean: 0.6312633216381073 | loss/std: 0.0784283334545865 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.5742327570915222 | loss/mean: 0.5742327570915222 | loss/std: 0.03693795087353833 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.7733730971813202 | loss/mean: 0.7733730971813202 | loss/std: 0.1404103601338587 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.8465590953826905 | loss/mean: 0.8465590953826905 | loss/std: 0.19973536980301898 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.7982619941234589 | loss/mean: 0.7982619941234589 | loss/std: 0.1176036211779876 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.574495780467

train (8/15) loss: 0.7438053786754608 | loss/mean: 0.7438053786754608 | loss/std: 0.09976018806621936 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.7478484511375427 | loss/mean: 0.7478484511375427 | loss/std: 0.1367126656037683 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.7137681007385255 | loss/mean: 0.7137681007385255 | loss/std: 0.09514429142558034 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.7712421894073487 | loss/mean: 0.7712421894073487 | loss/std: 0.15071220290929743 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.7128824591636658 | loss/mean: 0.7128824591636658 | loss/std: 0.10876747694353836 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.7178488135337829 | loss/mean: 0.7178488135337829 | loss/std: 0.12005718489151153 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.8161893427371979 | loss/mean: 0.8161893427371979 | loss/std: 0.1773317026016915 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.871354877948761 | loss/me

train (8/15) loss: 0.7438053786754608 | loss/mean: 0.7438053786754608 | loss/std: 0.09976018806621936 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.7478484511375427 | loss/mean: 0.7478484511375427 | loss/std: 0.1367126656037683 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.7137681007385255 | loss/mean: 0.7137681007385255 | loss/std: 0.09514429142558034 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.7712421894073487 | loss/mean: 0.7712421894073487 | loss/std: 0.15071220290929743 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.7128824591636658 | loss/mean: 0.7128824591636658 | loss/std: 0.10876747694353836 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.7178488135337829 | loss/mean: 0.7178488135337829 | loss/std: 0.12005718489151153 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.8161893427371979 | loss/mean: 0.8161893427371979 | loss/std: 0.1773317026016915 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.871354877948761 | loss/me

train (8/15) loss: 0.6077308952808379 | loss/mean: 0.6077308952808379 | loss/std: 0.057446237503114235 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.5809834718704223 | loss/mean: 0.5809834718704223 | loss/std: 0.03928225708778215 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.6286871135234833 | loss/mean: 0.6286871135234833 | loss/std: 0.07994827901442728 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.5720472574234009 | loss/mean: 0.5720472574234009 | loss/std: 0.04042113213684293 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.7711089968681335 | loss/mean: 0.7711089968681335 | loss/std: 0.1408269578215591 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.8513018131256104 | loss/mean: 0.8513018131256104 | loss/std: 0.20194398276061482 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.8881483554840087 | loss/mean: 0.8881483554840087 | loss/std: 0.09135071866041541 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.8312966823577881 | loss

train (8/15) loss: 0.6077308952808379 | loss/mean: 0.6077308952808379 | loss/std: 0.057446237503114235 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.5809834718704223 | loss/mean: 0.5809834718704223 | loss/std: 0.03928225708778215 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.6286871135234833 | loss/mean: 0.6286871135234833 | loss/std: 0.07994827901442728 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.5720472574234009 | loss/mean: 0.5720472574234009 | loss/std: 0.04042113213684293 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.7711089968681335 | loss/mean: 0.7711089968681335 | loss/std: 0.1408269578215591 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.8513018131256104 | loss/mean: 0.8513018131256104 | loss/std: 0.20194398276061482 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.8881483554840087 | loss/mean: 0.8881483554840087 | loss/std: 0.09135071866041541 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.8312966823577881 | loss

train (8/15) loss: 1.0114450097084045 | loss/mean: 1.0114450097084045 | loss/std: 0.12060459554003618 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.871444833278656 | loss/mean: 0.8714

* Epoch (7/15) 
train (8/15) loss: 1.0114450097084045 | loss/mean: 1.0114450097084045 | loss/std: 0.12060459554003618 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.871444833278656 | l

train (8/15) loss: 1.0114450097084045 | loss/mean: 1.0114450097084045 | loss/std: 0.12060459554003618 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.871444833278656 | loss/mean: 0.871444833278656 | loss/std: 0.17379325098751455 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.891444879770279 | loss/mean: 0.891444879770279 | loss/std: 0.13632999025949763 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.871444833278656 | loss/mean: 0.8714

train (8/15) loss: 0.6017786026000976 | loss/mean: 0.6017786026000976 | loss/std: 0.06490247458649644 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.5947011351585388 | loss/mean: 0.5947011351585388 | loss/std: 0.07825321984346112 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.6034671902656554 | loss/mean: 0.6034671902656554 | loss/std: 0.06012963651784954 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.6251236319541931 | loss/mean: 0.6251236319541931 | loss/std: 0.05917028682109448 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.680581557750702 | loss/mean: 0.680581557750702 | loss/std: 0.0907150450053418 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.622597050666809 | loss/mean: 0.622597050666809 | loss/std: 0.05687295036793542 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.5958465933799744 | loss/mean: 0.5958465933799744 | loss/std: 0.053738830691764716 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.6079861402511597 | loss/mea

train (8/15) loss: 0.6017786026000976 | loss/mean: 0.6017786026000976 | loss/std: 0.06490247458649644 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.5947011351585388 | loss/mean: 0.5947011351585388 | loss/std: 0.07825321984346112 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.6034671902656554 | loss/mean: 0.6034671902656554 | loss/std: 0.06012963651784954 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.6251236319541931 | loss/mean: 0.6251236319541931 | loss/std: 0.05917028682109448 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.680581557750702 | loss/mean: 0.680581557750702 | loss/std: 0.0907150450053418 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.622597050666809 | loss/mean: 0.622597050666809 | loss/std: 0.05687295036793542 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.5958465933799744 | loss/mean: 0.5958465933799744 | loss/std: 0.053738830691764716 | lr: 0.02 | momentum: 0.9
valid (11/15) loss: 0.6079861402511597 | loss/mea

valid (7/15) loss: 0.5830106616020203 | loss/mean: 0.5830106616020203 | loss/std: 0.030189483703164908 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 0.5943013727664948 | loss/mean: 0.5943013727664948 | loss/std: 0.055334389390438735 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.5821499228477478 | loss/mean: 0.5821499228477478 | loss/std: 0.031071345843354536 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.6034344553947448 | loss/mean: 0.6034344553947448 | loss/std: 0.05098763893402918 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.5789595603942871 | loss/mean: 0.5789595603942871 | loss/std: 0.03430014479044456 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.6211256444454194 | loss/mean: 0.6211256444454194 | loss/std: 0.07359354783775397 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.5885010480880737 | loss/mean: 0.5885010480880737 | loss/std: 0.03636925736920821 | lr: 0.02 | momentum: 0.9
* Epoch (10/15) 
train (11/15) loss: 0.58618

train (7/15) loss: 0.7479045450687409 | loss/mean: 0.7479045450687409 | loss/std: 0.14752429326526117 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 0.8470761775970459 | loss/mean: 0.8470761775970459 | loss/std: 0.19918042335593178 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 0.8551781058311462 | loss/mean: 0.8551781058311462 | loss/std: 0.07790194522112513 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.7763758540153504 | loss/mean: 0.7763758540153504 | loss/std: 0.14572802053834663 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.6908719122409821 | loss/mean: 0.6908719122409821 | loss/std: 0.05348930425142415 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.601416552066803 | loss/mean: 0.601416552066803 | loss/std: 0.04117374805112822 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.6627570927143097 | loss/mean: 0.6627570927143097 | loss/std: 0.0843011783759534 | lr: 0.02 | momentum: 0.9
valid (10/15) loss: 0.6318809390068054 | loss/mean:

valid (6/15) loss: 0.6437192678451538 | loss/mean: 0.6437192678451538 | loss/std: 0.08675188545179637 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 0.7479045450687409 | loss/mean: 0.7479045450687409 | loss/std: 0.14752429326526117 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 0.8470761775970459 | loss/mean: 0.8470761775970459 | loss/std: 0.19918042335593178 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 0.8551781058311462 | loss/mean: 0.8551781058311462 | loss/std: 0.07790194522112513 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.7763758540153504 | loss/mean: 0.7763758540153504 | loss/std: 0.14572802053834663 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.6908719122409821 | loss/mean: 0.6908719122409821 | loss/std: 0.05348930425142415 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.601416552066803 | loss/mean: 0.601416552066803 | loss/std: 0.04117374805112822 | lr: 0.02 | momentum: 0.9
* Epoch (9/15) 
train (10/15) loss: 0.6627570927143

* Epoch (5/15) 
train (6/15) loss: 0.6563269913196563 | loss/mean: 0.6563269913196563 | loss/std: 0.10435897953998241 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.6437192678451538 | loss/mean: 0.6437192678451538 | loss/std: 0.08675188545179637 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 0.7479045450687409 | loss/mean: 0.7479045450687409 | loss/std: 0.14752429326526117 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 0.8470761775970459 | loss/mean: 0.8470761775970459 | loss/std: 0.19918042335593178 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 0.8551781058311462 | loss/mean: 0.8551781058311462 | loss/std: 0.07790194522112513 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 0.7763758540153504 | loss/mean: 0.7763758540153504 | loss/std: 0.14572802053834663 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.6908719122409821 | loss/mean: 0.6908719122409821 | loss/std: 0.05348930425142415 | lr: 0.02 | momentum: 0.9
valid (9/15) loss: 0.601416552066

valid (5/15) loss: 0.5820703506469727 | loss/mean: 0.5820703506469727 | loss/std: 0.05875023607422992 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.6905672609806062 | loss/mean: 0.6905672609806062 | loss/std: 0.11187509805144123 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.7661934494972229 | loss/mean: 0.7661934494972229 | loss/std: 0.11322347129731054 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 0.8140464425086975 | loss/mean: 0.8140464425086975 | loss/std: 0.12133497467109108 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 1.0560330986976623 | loss/mean: 1.0560330986976623 | loss/std: 0.13777954509373255 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 0.9815098702907563 | loss/mean: 0.9815098702907563 | loss/std: 0.14217803053102826 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 1.1093619823455811 | loss/mean: 1.1093619823455811 | loss/std: 0.18931947856996298 | lr: 0.02 | momentum: 0.9
* Epoch (8/15) 
train (9/15) loss: 0.985838097333

train (5/15) loss: 0.6179767310619354 | loss/mean: 0.6179767310619354 | loss/std: 0.06055960725374828 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.5820703506469727 | loss/mean: 0.5820703506469727 | loss/std: 0.05875023607422992 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.6905672609806062 | loss/mean: 0.6905672609806062 | loss/std: 0.11187509805144123 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.7661934494972229 | loss/mean: 0.7661934494972229 | loss/std: 0.11322347129731054 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 0.8140464425086975 | loss/mean: 0.8140464425086975 | loss/std: 0.12133497467109108 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 1.0560330986976623 | loss/mean: 1.0560330986976623 | loss/std: 0.13777954509373255 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 0.9815098702907563 | loss/mean: 0.9815098702907563 | loss/std: 0.14217803053102826 | lr: 0.02 | momentum: 0.9
valid (8/15) loss: 1.1093619823455811 | loss/mean

valid (4/15) loss: 0.6004872679710388 | loss/mean: 0.6004872679710388 | loss/std: 0.06985934314883885 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.6088525354862213 | loss/mean: 0.6088525354862213 | loss/std: 0.06859590086002755 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.6296581029891968 | loss/mean: 0.6296581029891968 | loss/std: 0.08656639539108414 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.6577195763587952 | loss/mean: 0.6577195763587952 | loss/std: 0.08612176304893493 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.5804675817489624 | loss/mean: 0.5804675817489624 | loss/std: 0.032123770387433616 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 0.6769543945789338 | loss/mean: 0.6769543945789338 | loss/std: 0.10514818930270198 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 0.6054590940475464 | loss/mean: 0.6054590940475464 | loss/std: 0.06331290253829333 | lr: 0.02 | momentum: 0.9
* Epoch (7/15) 
train (8/15) loss: 0.61869751214

train (4/15) loss: 0.6759983420372009 | loss/mean: 0.6759983420372009 | loss/std: 0.10334082168226748 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.59028480052948 | loss/mean: 0.59028480052948 | loss/std: 0.04944108703474784 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.6158824443817138 | loss/mean: 0.6158824443817138 | loss/std: 0.06001660669900379 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.5637867331504822 | loss/mean: 0.5637867331504822 | loss/std: 0.016875383522763264 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.6597585082054138 | loss/mean: 0.6597585082054138 | loss/std: 0.09639027210586945 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.7862536072731018 | loss/mean: 0.7862536072731018 | loss/std: 0.1055057049923584 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 0.7102965176105498 | loss/mean: 0.7102965176105498 | loss/std: 0.10347507173477417 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 0.577040696144104 | loss/mean: 0.5

train (4/15) loss: 0.6759983420372009 | loss/mean: 0.6759983420372009 | loss/std: 0.10334082168226748 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.59028480052948 | loss/mean: 0.59028480052948 | loss/std: 0.04944108703474784 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.6158824443817138 | loss/mean: 0.6158824443817138 | loss/std: 0.06001660669900379 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.5637867331504822 | loss/mean: 0.5637867331504822 | loss/std: 0.016875383522763264 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.6597585082054138 | loss/mean: 0.6597585082054138 | loss/std: 0.09639027210586945 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.7862536072731018 | loss/mean: 0.7862536072731018 | loss/std: 0.1055057049923584 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 0.7102965176105498 | loss/mean: 0.7102965176105498 | loss/std: 0.10347507173477417 | lr: 0.02 | momentum: 0.9
valid (7/15) loss: 0.577040696144104 | loss/mean: 0.5

valid (3/15) loss: 0.5828028321266174 | loss/mean: 0.5828028321266174 | loss/std: 0.024883753932782516 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.655822479724884 | loss/mean: 0.655822479724884 | loss/std: 0.1005109308078284 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.7095723986625672 | loss/mean: 0.7095723986625672 | loss/std: 0.09688407391516682 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.6670328855514526 | loss/mean: 0.6670328855514526 | loss/std: 0.11849429931238534 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.5759272933006286 | loss/mean: 0.5759272933006286 | loss/std: 0.02968399711174123 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.6371068537235259 | loss/mean: 0.6371068537235259 | loss/std: 0.08692573110643312 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.5938013911247253 | loss/mean: 0.5938013911247253 | loss/std: 0.04939031618596771 | lr: 0.02 | momentum: 0.9
* Epoch (6/15) 
train (7/15) loss: 0.67290493249893

train (3/15) loss: 0.6334851801395416 | loss/mean: 0.6334851801395416 | loss/std: 0.056821854780856815 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.5940884590148926 | loss/mean: 0.5940884590148926 | loss/std: 0.07323972497952463 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.7074181139469146 | loss/mean: 0.7074181139469146 | loss/std: 0.11129237925471225 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.7466418385505676 | loss/mean: 0.7466418385505676 | loss/std: 0.13016071130336918 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.6633248269557953 | loss/mean: 0.6633248269557953 | loss/std: 0.11795475069008178 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.7350261211395264 | loss/mean: 0.7350261211395264 | loss/std: 0.125398626213955 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.6019038498401642 | loss/mean: 0.6019038498401642 | loss/std: 0.0585937323916131 | lr: 0.02 | momentum: 0.9
valid (6/15) loss: 0.5822221875190735 | loss/mean: 

valid (2/15) loss: 0.6825080394744873 | loss/mean: 0.6825080394744873 | loss/std: 0.09643380637946428 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.6334851801395416 | loss/mean: 0.6334851801395416 | loss/std: 0.056821854780856815 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.5940884590148926 | loss/mean: 0.5940884590148926 | loss/std: 0.07323972497952463 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.7074181139469146 | loss/mean: 0.7074181139469146 | loss/std: 0.11129237925471225 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.7466418385505676 | loss/mean: 0.7466418385505676 | loss/std: 0.13016071130336918 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.6633248269557953 | loss/mean: 0.6633248269557953 | loss/std: 0.11795475069008178 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.7350261211395264 | loss/mean: 0.7350261211395264 | loss/std: 0.125398626213955 | lr: 0.02 | momentum: 0.9
* Epoch (5/15) 
train (6/15) loss: 0.6019038498401

* Epoch (1/15) 
train (2/15) loss: 0.7627966761589051 | loss/mean: 0.7627966761589051 | loss/std: 0.10677571968114652 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.8871170997619628 | loss/mean: 0.8871170997619628 | loss/std: 0.17142644326464423 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.8072991013526916 | loss/mean: 0.8072991013526916 | loss/std: 0.11269144540824691 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.8392840981483459 | loss/mean: 0.8392840981483459 | loss/std: 0.18999865918904751 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.7236482560634613 | loss/mean: 0.7236482560634613 | loss/std: 0.11819298194505007 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.5879395365715027 | loss/mean: 0.5879395365715027 | loss/std: 0.06379350477716185 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.6143955707550048 | loss/mean: 0.6143955707550048 | loss/std: 0.07217049494251647 | lr: 0.02 | momentum: 0.9
valid (5/15) loss: 0.596790826320

valid (1/15) loss: 0.7754140615463256 | loss/mean: 0.7754140615463256 | loss/std: 0.09302479057660218 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.7627966761589051 | loss/mean: 0.7627966761589051 | loss/std: 0.10677571968114652 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.8871170997619628 | loss/mean: 0.8871170997619628 | loss/std: 0.17142644326464423 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.8072991013526916 | loss/mean: 0.8072991013526916 | loss/std: 0.11269144540824691 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.8392840981483459 | loss/mean: 0.8392840981483459 | loss/std: 0.18999865918904751 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.7236482560634613 | loss/mean: 0.7236482560634613 | loss/std: 0.11819298194505007 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.5879395365715027 | loss/mean: 0.5879395365715027 | loss/std: 0.06379350477716185 | lr: 0.02 | momentum: 0.9
* Epoch (4/15) 
train (5/15) loss: 0.614395570755

train (1/15) loss: 1.0471409380435945 | loss/mean: 1.0471409380435945 | loss/std: 0.05192777573510364 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.8624674558639527 | loss/mean: 0.8624674558639527 | loss/std: 0.08575058829531838 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.8236027300357818 | loss/mean: 0.8236027300357818 | loss/std: 0.06828478937887825 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7216997265815734 | loss/mean: 0.7216997265815734 | loss/std: 0.05338066690574835 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.7305196464061736 | loss/mean: 0.7305196464061736 | loss/std: 0.060026137911298676 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6293381929397583 | loss/mean: 0.6293381929397583 | loss/std: 0.04778912744950747 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.634178912639618 | loss/mean: 0.634178912639618 | loss/std: 0.06384283250323483 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.6484604001045227 | loss/mean:

train (1/15) loss: 1.0471409380435945 | loss/mean: 1.0471409380435945 | loss/std: 0.05192777573510364 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.8624674558639527 | loss/mean: 0.8624674558639527 | loss/std: 0.08575058829531838 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.8236027300357818 | loss/mean: 0.8236027300357818 | loss/std: 0.06828478937887825 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7216997265815734 | loss/mean: 0.7216997265815734 | loss/std: 0.05338066690574835 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.7305196464061736 | loss/mean: 0.7305196464061736 | loss/std: 0.060026137911298676 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6293381929397583 | loss/mean: 0.6293381929397583 | loss/std: 0.04778912744950747 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.634178912639618 | loss/mean: 0.634178912639618 | loss/std: 0.06384283250323483 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.6484604001045227 | loss/mean:

train (1/15) loss: 1.0845162153244017 | loss/mean: 1.0845162153244017 | loss/std: 0.02923848325268716 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.9546544194221497 | loss/mean: 0.9546544194221497 | loss/std: 0.066134962033581 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.8511602580547333 | loss/mean: 0.8511602580547333 | loss/std: 0.07085547856139797 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7052608251571655 | loss/mean: 0.7052608251571655 | loss/std: 0.06930059170711769 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.7180993199348449 | loss/mean: 0.7180993199348449 | loss/std: 0.11735784217627422 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6824437737464905 | loss/mean: 0.6824437737464905 | loss/std: 0.09325933178644491 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.6609440207481384 | loss/mean: 0.6609440207481384 | loss/std: 0.0688352664150658 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.6158190727233886 | loss/mean: 0

train (1/15) loss: 1.0845162153244017 | loss/mean: 1.0845162153244017 | loss/std: 0.02923848325268716 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.9546544194221497 | loss/mean: 0.9546544194221497 | loss/std: 0.066134962033581 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.8511602580547333 | loss/mean: 0.8511602580547333 | loss/std: 0.07085547856139797 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7052608251571655 | loss/mean: 0.7052608251571655 | loss/std: 0.06930059170711769 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.7180993199348449 | loss/mean: 0.7180993199348449 | loss/std: 0.11735784217627422 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6824437737464905 | loss/mean: 0.6824437737464905 | loss/std: 0.09325933178644491 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.6609440207481384 | loss/mean: 0.6609440207481384 | loss/std: 0.0688352664150658 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.6158190727233886 | loss/mean: 0

train (1/15) loss: 0.9763363420963288 | loss/mean: 0.9763363420963288 | loss/std: 0.08038916074922918 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.8220163583755493 | loss/mean: 0.8220163583755493 | loss/std: 0.10676201489967238 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.7982220947742463 | loss/mean: 0.7982220947742463 | loss/std: 0.039923245478468025 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7230567693710327 | loss/mean: 0.7230567693710327 | loss/std: 0.05127514552767957 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.7117858290672302 | loss/mean: 0.7117858290672302 | loss/std: 0.03908070907665385 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6662146210670471 | loss/mean: 0.6662146210670471 | loss/std: 0.04182495827438046 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.6692903935909271 | loss/mean: 0.6692903935909271 | loss/std: 0.04437605680960011 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.6342666983604431 | loss/mea

train (1/15) loss: 1.0877926707267762 | loss/mean: 1.0877926707267762 | loss/std: 0.024555890413891737 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 1.0164111256599426 | loss/mean: 1.0164111256599426 | loss/std: 0.02951138080564964 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.9034785091876985 | loss/mean: 0.9034785091876985 | loss/std: 0.08786633220289733 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7734748125076294 | loss/mean: 0.7734748125076294 | loss/std: 0.12644939270125102 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.7379233300685882 | loss/mean: 0.7379233300685882 | loss/std: 0.04987413992422748 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6676155209541321 | loss/mean: 0.6676155209541321 | loss/std: 0.07596400975493972 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.639194667339325 | loss/mean: 0.639194667339325 | loss/std: 0.03829912937108609 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.633695375919342 | loss/mean: 

train (1/15) loss: 0.9763363420963288 | loss/mean: 0.9763363420963288 | loss/std: 0.08038916074922918 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.8220163583755493 | loss/mean: 0.8220163583755493 | loss/std: 0.10676201489967238 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.7982220947742463 | loss/mean: 0.7982220947742463 | loss/std: 0.039923245478468025 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7230567693710327 | loss/mean: 0.7230567693710327 | loss/std: 0.05127514552767957 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.7117858290672302 | loss/mean: 0.7117858290672302 | loss/std: 0.03908070907665385 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6662146210670471 | loss/mean: 0.6662146210670471 | loss/std: 0.04182495827438046 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.6692903935909271 | loss/mean: 0.6692903935909271 | loss/std: 0.04437605680960011 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.6342666983604431 | loss/mea

train (1/15) loss: 1.070479154586792 | loss/mean: 1.070479154586792 | loss/std: 0.027372299106081535 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.9679574489593505 | loss/mean: 0.9679574489593505 | loss/std: 0.023913379551874205 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.8364065945148468 | loss/mean: 0.8364065945148468 | loss/std: 0.07884109961220767 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.6798811912536621 | loss/mean: 0.6798811912536621 | loss/std: 0.06475732061960378 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.6468950390815734 | loss/mean: 0.6468950390815734 | loss/std: 0.035046452952595446 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6169135451316834 | loss/mean: 0.6169135451316834 | loss/std: 0.06207588896855 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.6046760082244873 | loss/mean: 0.6046760082244873 | loss/std: 0.048656941918179736 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.5851769924163819 | loss/mean:

train (1/15) loss: 0.9683294653892517 | loss/mean: 0.9683294653892517 | loss/std: 0.1112008458311627 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.7360962510108948 | loss/mean: 0.7360962510108948 | loss/std: 0.09512945340309759 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.680477488040924 | loss/mean: 0.680477488040924 | loss/std: 0.05605702505689547 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7296147346496582 | loss/mean: 0.7296147346496582 | loss/std: 0.1177902997309185 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.6147432029247284 | loss/mean: 0.6147432029247284 | loss/std: 0.050977125720902766 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6000249743461609 | loss/mean: 0.6000249743461609 | loss/std: 0.07230980115080798 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.610648089647293 | loss/mean: 0.610648089647293 | loss/std: 0.08652787950738969 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.6591464877128601 | loss/mean: 0.6

train (1/15) loss: 0.9683294653892517 | loss/mean: 0.9683294653892517 | loss/std: 0.1112008458311627 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.7360962510108948 | loss/mean: 0.7360962510108948 | loss/std: 0.09512945340309759 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.680477488040924 | loss/mean: 0.680477488040924 | loss/std: 0.05605702505689547 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7296147346496582 | loss/mean: 0.7296147346496582 | loss/std: 0.1177902997309185 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.6147432029247284 | loss/mean: 0.6147432029247284 | loss/std: 0.050977125720902766 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6000249743461609 | loss/mean: 0.6000249743461609 | loss/std: 0.07230980115080798 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.610648089647293 | loss/mean: 0.610648089647293 | loss/std: 0.08652787950738969 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.6591464877128601 | loss/mean: 0.6

train (1/15) loss: 0.9405204117298126 | loss/mean: 0.9405204117298126 | loss/std: 0.1100386323705014 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.7597640872001648 | loss/mean: 0.7597640872001648 | loss/std: 0.09726226945075331 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.6835220098495484 | loss/mean: 0.6835220098495484 | loss/std: 0.0615221140261628 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.6500792145729065 | loss/mean: 0.6500792145729065 | loss/std: 0.09271079202912877 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.6395674347877502 | loss/mean: 0.6395674347877502 | loss/std: 0.0899096970690933 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.5899372100830078 | loss/mean: 0.5899372100830078 | loss/std: 0.06525328751580894 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.5851580560207366 | loss/mean: 0.5851580560207366 | loss/std: 0.03990831062261047 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.5756986975669861 | loss/mean: 0

train (1/15) loss: 1.0566652178764344 | loss/mean: 1.0566652178764344 | loss/std: 0.044969455421949626 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.8767531156539917 | loss/mean: 0.8767531156539917 | loss/std: 0.06854491630518585 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.798592084646225 | loss/mean: 0.798592084646225 | loss/std: 0.05863585708088909 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.6635496258735657 | loss/mean: 0.6635496258735657 | loss/std: 0.06913860462931336 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.62728271484375 | loss/mean: 0.62728271484375 | loss/std: 0.04605619068307033 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.5936290502548218 | loss/mean: 0.5936290502548218 | loss/std: 0.04011744976817629 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.6233319878578185 | loss/mean: 0.6233319878578185 | loss/std: 0.08162568352017437 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.6585376381874084 | loss/mean: 0.6

train (1/15) loss: 1.0566652178764344 | loss/mean: 1.0566652178764344 | loss/std: 0.044969455421949626 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.8767531156539917 | loss/mean: 0.8767531156539917 | loss/std: 0.06854491630518585 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.798592084646225 | loss/mean: 0.798592084646225 | loss/std: 0.05863585708088909 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.6635496258735657 | loss/mean: 0.6635496258735657 | loss/std: 0.06913860462931336 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.62728271484375 | loss/mean: 0.62728271484375 | loss/std: 0.04605619068307033 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.5936290502548218 | loss/mean: 0.5936290502548218 | loss/std: 0.04011744976817629 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.6233319878578185 | loss/mean: 0.6233319878578185 | loss/std: 0.08162568352017437 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.6585376381874084 | loss/mean: 0.6

* Epoch (15/15) 
Top best models:
logs/trial_300/train.7.pth	0.5790
train (1/15) loss: 1.0669759452342986 | loss/mean: 1.0669759452342986 | loss/std: 0.047055166563676545 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.8603938937187194 | loss/mean: 0.8603938937187194 | loss/std: 0.10678478250644026 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.8772588312625885 | loss/mean: 0.8772588312625885 | loss/std: 0.11429859734092516 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7864745259284973 | loss/mean: 0.7864745259284973 | loss/std: 0.08043552497367769 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.7754641890525819 | loss/mean: 0.7754641890525819 | loss/std: 0.06585122144591382 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6622161269187927 | loss/mean: 0.6622161269187927 | loss/std: 0.06883017548176641 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.7471619963645936 | loss/mean: 0.7471619963645936 | loss/std: 0.1265179048710328 | lr: 0.

valid (15/15) loss: 0.8514445185661316 | loss/mean: 0.8514445185661316 | loss/std: 0.20203036440724503 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_302/train.7.pth	0.5790
train (1/15) loss: 1.0669759452342986 | loss/mean: 1.0669759452342986 | loss/std: 0.047055166563676545 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.8603938937187194 | loss/mean: 0.8603938937187194 | loss/std: 0.10678478250644026 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.8772588312625885 | loss/mean: 0.8772588312625885 | loss/std: 0.11429859734092516 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7864745259284973 | loss/mean: 0.7864745259284973 | loss/std: 0.08043552497367769 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.7754641890525819 | loss/mean: 0.7754641890525819 | loss/std: 0.06585122144591382 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.6622161269187927 | loss/mean: 0.6622161269187927 | loss/std: 0.06883017548176641 | lr: 0.02 | momentum:

train (15/15) loss: 0.9016146779060363 | loss/mean: 0.9016146779060363 | loss/std: 0.10290218801143018 | lr: 0.02 | momentum: 0.9
valid (15/15) loss: 0.8514445185661316 | loss/mean: 0.8514445185661316 | loss/std: 0.20203036440724503 | lr: 0.02 | momentum: 0.9
* Epoch (15/15) 
Top best models:
logs/trial_304/train.7.pth	0.5790


  layer_configurations = np.array(layer_configurations)
    There is an imbalance between your GPUs. You may want to exclude GPU 1 which
    has less than 75% of the memory or cores of GPU 0. You can do so by setting
    the device_ids argument to DataParallel, or by setting the CUDA_VISIBLE_DEVICES
    environment variable.
  x = F.softmax(x)


train (1/15) loss: 1.1011430621147154 | loss/mean: 1.1011430621147154 | loss/std: 0.031036565256725036 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 1.0617070198059082 | loss/mean: 1.0617070198059082 | loss/std: 0.019987888085999075 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.978238570690155 | loss/mean: 0.978238570690155 | loss/std: 0.07164238623047349 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.8667152285575866 | loss/mean: 0.8667152285575866 | loss/std: 0.09410837889066719 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.870817345380783 | loss/mean: 0.870817345380783 | loss/std: 0.06022854448628603 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.8083127737045288 | loss/mean: 0.8083127737045288 | loss/std: 0.07556640098349804 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.8366216540336608 | loss/mean: 0.8366216540336608 | loss/std: 0.06241059809023939 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.8021223902702331 | loss/mean: 

train (1/15) loss: 1.008935296535492 | loss/mean: 1.008935296535492 | loss/std: 0.09285724086196197 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.795720887184143 | loss/mean: 0.795720887184143 | loss/std: 0.08769151811475366 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.8331292033195495 | loss/mean: 0.8331292033195495 | loss/std: 0.06272755505899272 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.777813720703125 | loss/mean: 0.777813720703125 | loss/std: 0.11665238956192307 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.8073484003543854 | loss/mean: 0.8073484003543854 | loss/std: 0.05729537457590455 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.7469719409942627 | loss/mean: 0.7469719409942627 | loss/std: 0.07920457565565465 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.7320720374584199 | loss/mean: 0.7320720374584199 | loss/std: 0.03884741138031153 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.7039286613464355 | loss/mean: 0.70

train (1/15) loss: 1.0721407890319823 | loss/mean: 1.0721407890319823 | loss/std: 0.041071359627274144 | lr: 0.02 | momentum: 0.9
valid (1/15) loss: 0.8958200097084046 | loss/mean: 0.8958200097084046 | loss/std: 0.10316300315299337 | lr: 0.02 | momentum: 0.9
* Epoch (1/15) 
train (2/15) loss: 0.8800778687000275 | loss/mean: 0.8800778687000275 | loss/std: 0.07168536763741072 | lr: 0.02 | momentum: 0.9
valid (2/15) loss: 0.7991868495941162 | loss/mean: 0.7991868495941162 | loss/std: 0.11914281962715474 | lr: 0.02 | momentum: 0.9
* Epoch (2/15) 
train (3/15) loss: 0.8252937018871307 | loss/mean: 0.8252937018871307 | loss/std: 0.054673779411353866 | lr: 0.02 | momentum: 0.9
valid (3/15) loss: 0.777630877494812 | loss/mean: 0.777630877494812 | loss/std: 0.09805756038640213 | lr: 0.02 | momentum: 0.9
* Epoch (3/15) 
train (4/15) loss: 0.8027435421943664 | loss/mean: 0.8027435421943664 | loss/std: 0.055921830766869504 | lr: 0.02 | momentum: 0.9
valid (4/15) loss: 0.7610607028007508 | loss/mea