In [1]:
import json
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from catalyst import dl, utils
from catalyst.contrib.datasets import MNIST
from sklearn import datasets
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from torch import nn as nn
from torch import optim
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
from catalyst.loggers.mlflow import MLflowLogger

In [2]:
torch.manual_seed(0)


<torch._C.Generator at 0x7fc490279610>

In [3]:
iris = datasets.load_iris()
iris_input_dataset = iris['data']
target = torch.from_numpy(iris['target'])


In [4]:
scaler = MinMaxScaler()
scaler.fit(iris_input_dataset)
iris_input_dataset = torch.from_numpy(scaler.transform(iris_input_dataset)).float()

In [5]:
X_train, X_test, y_train, y_test = train_test_split(iris_input_dataset, target, test_size=0.33, random_state=42)

In [6]:
training_dataset = TensorDataset(X_train, y_train)
validation_dataset = TensorDataset(X_test, y_test)
train_loader = DataLoader(training_dataset, batch_size=10, num_workers=1)
valid_loader = DataLoader(validation_dataset, batch_size=10, num_workers=1)
loaders = {"train": train_loader, "valid": valid_loader}

In [7]:
criterion = nn.CrossEntropyLoss()


In [8]:
runner = dl.SupervisedRunner(
    input_key="features", output_key="logits", target_key="targets", loss_key="loss"
)


In [9]:
def get_random_configurations(number_of_configurations, rng):
    layer_configurations = []
    for _ in range(number_of_configurations):
        layer_configuration = []
        number_of_hidden_layers = rng.randint(low=1, high=6)
        for _ in range(number_of_hidden_layers):
            layer_configuration.append(rng.randint(low=2, high=100))
        layer_configurations.append(layer_configuration)
    return layer_configurations
rng = np.random.RandomState(1234)
number_of_configurations = 20
layer_configurations = get_random_configurations(number_of_configurations, rng)


In [10]:
layer_configurations

[[40, 55, 78, 26],
 [17, 51],
 [28, 32],
 [32, 28, 60, 94],
 [75],
 [78, 39, 36],
 [69, 13, 2],
 [77, 82],
 [4, 21, 14, 67],
 [83, 16, 73, 62],
 [48, 30, 83],
 [98, 14, 71, 97, 33],
 [91, 86],
 [43],
 [58],
 [72, 58, 88, 46, 92],
 [49, 51, 20, 87],
 [39, 40, 9],
 [69, 7, 49, 49, 17],
 [12, 30, 6]]

In [11]:
class MLP(nn.Module):
    def __init__(self, input_layer_size, output_layer_size, layer_configuration, activation_type='relu'):
        super(MLP, self).__init__()
        self.fully_connected_layers = nn.ModuleDict()
        self.activation_type = activation_type
        hidden_layer_number = 0
        for hidden_layer_idx in range(len(layer_configuration)):
            if hidden_layer_idx == 0:
                self.fully_connected_layers[str(hidden_layer_number)] = nn.Linear(
                    input_layer_size, layer_configuration[hidden_layer_idx]
                )
                hidden_layer_number += 1
            if hidden_layer_idx == len(layer_configuration) - 1:
                self.fully_connected_layers[str(hidden_layer_number)] = nn.Linear(
                    layer_configuration[hidden_layer_idx],
                    output_layer_size
                )
            else:
                self.fully_connected_layers[str(hidden_layer_number)] = nn.Linear(
                    layer_configuration[hidden_layer_idx],
                    layer_configuration[hidden_layer_idx+1]
                )
                hidden_layer_number += 1
    def forward(self, x):
        for fc_key in self.fully_connected_layers:
            x = self.fully_connected_layers[fc_key](x)
            if fc_key != str(len(self.fully_connected_layers) -1):
                x = F.relu(x)
            else:
                x = F.softmax(x)
        return x

In [19]:
def train_and_evaluate_mlp(trial_number, layer_configuration, epochs):
    model = MLP(
        input_layer_size=iris_input_dataset.shape[1],
        layer_configuration=layer_configuration,
        output_layer_size=len(np.unique(target))
    )
    optimizer = optim.Adam(model.parameters(), lr=0.02)
    checkpoint_logdir = "experiments"
    loggers = {"mlflow": MLflowLogger(experiment="test_exp", run="test_run")}
    runner.train(
        model=model,
        hparams={},
        criterion=criterion,
        optimizer=optimizer,
        loaders=loaders,
        num_epochs=epochs,
        callbacks=[
            dl.CheckpointCallback(
                logdir=checkpoint_logdir,
                loader_key="valid",
                metric_key="loss",
                mode="model",
            )
        ],
        logdir="./logs",
        valid_loader="valid",
        valid_metric="loss",
        minimize_valid_metric=True,
        verbose=True,
        loggers=loggers,
    )
    

In [20]:
for idx, layer_config in enumerate(layer_configurations):
     train_and_evaluate_mlp(
         idx, layer_config, epochs=10,
     )

1/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (1/10) loss: 0.9763587355613709 | loss/mean: 0.9763587355613709 | loss/std: 0.10698749295865581 | lr: 0.02 | momentum: 0.9


  x = F.softmax(x)


1/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (1/10) loss: 0.8250154256820679 | loss/mean: 0.8250154256820679 | loss/std: 0.16391473347218385 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 


2/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (2/10) loss: 0.8309035420417785 | loss/mean: 0.8309035420417785 | loss/std: 0.052162993541782096 | lr: 0.02 | momentum: 0.9


2/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (2/10) loss: 0.7209554195404053 | loss/mean: 0.7209554195404053 | loss/std: 0.1017836446791273 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 


3/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (3/10) loss: 0.6848497748374939 | loss/mean: 0.6848497748374939 | loss/std: 0.06544799680030126 | lr: 0.02 | momentum: 0.9


3/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (3/10) loss: 0.6308364272117615 | loss/mean: 0.6308364272117615 | loss/std: 0.09735562563329393 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 


4/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (4/10) loss: 0.6097493290901184 | loss/mean: 0.6097493290901184 | loss/std: 0.04802303781303631 | lr: 0.02 | momentum: 0.9


4/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (4/10) loss: 0.5809637427330017 | loss/mean: 0.5809637427330017 | loss/std: 0.032753960076642406 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 


5/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (5/10) loss: 0.5977155685424805 | loss/mean: 0.5977155685424805 | loss/std: 0.05929471200170352 | lr: 0.02 | momentum: 0.9


5/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (5/10) loss: 0.5792567729949951 | loss/mean: 0.5792567729949951 | loss/std: 0.05491537185595135 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 


6/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (6/10) loss: 0.6045595943927765 | loss/mean: 0.6045595943927765 | loss/std: 0.0715774841119495 | lr: 0.02 | momentum: 0.9


6/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (6/10) loss: 0.6194616436958313 | loss/mean: 0.6194616436958313 | loss/std: 0.10882204798276765 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 


7/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (7/10) loss: 0.6622497320175171 | loss/mean: 0.6622497320175171 | loss/std: 0.10094008398271737 | lr: 0.02 | momentum: 0.9


7/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (7/10) loss: 0.5881604313850403 | loss/mean: 0.5881604313850403 | loss/std: 0.0705725045777221 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 


8/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (8/10) loss: 0.6465096473693848 | loss/mean: 0.6465096473693848 | loss/std: 0.08383822521607946 | lr: 0.02 | momentum: 0.9


8/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (8/10) loss: 0.742832624912262 | loss/mean: 0.742832624912262 | loss/std: 0.12064413076579106 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 


9/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (9/10) loss: 0.833983963727951 | loss/mean: 0.833983963727951 | loss/std: 0.11220593547083615 | lr: 0.02 | momentum: 0.9


9/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (9/10) loss: 0.8477526664733886 | loss/mean: 0.8477526664733886 | loss/std: 0.20029038023759912 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 


10/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (10/10) loss: 0.7579654633998871 | loss/mean: 0.7579654633998871 | loss/std: 0.10104050953189324 | lr: 0.02 | momentum: 0.9


10/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (10/10) loss: 0.5768581032752991 | loss/mean: 0.5768581032752991 | loss/std: 0.03883448536461268 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top models:
experiments/model.0010.pth	10.0000


1/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (1/10) loss: 1.023148202896118 | loss/mean: 1.023148202896118 | loss/std: 0.06618131752602993 | lr: 0.02 | momentum: 0.9


1/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (1/10) loss: 0.8612443208694458 | loss/mean: 0.8612443208694458 | loss/std: 0.09836935240023859 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 


2/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (2/10) loss: 0.8191352605819703 | loss/mean: 0.8191352605819703 | loss/std: 0.055280853581671424 | lr: 0.02 | momentum: 0.9


2/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (2/10) loss: 0.7266236901283264 | loss/mean: 0.7266236901283264 | loss/std: 0.08132020652370874 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 


3/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (3/10) loss: 0.6960615575313569 | loss/mean: 0.6960615575313569 | loss/std: 0.04016607148480381 | lr: 0.02 | momentum: 0.9


3/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (3/10) loss: 0.6338885426521301 | loss/mean: 0.6338885426521301 | loss/std: 0.054965639549353615 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 


4/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (4/10) loss: 0.6212317526340484 | loss/mean: 0.6212317526340484 | loss/std: 0.035694471591532724 | lr: 0.02 | momentum: 0.9


4/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (4/10) loss: 0.6113190174102783 | loss/mean: 0.6113190174102783 | loss/std: 0.06498100673483749 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 


5/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (5/10) loss: 0.6049151539802551 | loss/mean: 0.6049151539802551 | loss/std: 0.0429654991260232 | lr: 0.02 | momentum: 0.9


5/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (5/10) loss: 0.612663185596466 | loss/mean: 0.612663185596466 | loss/std: 0.07509904054264936 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 


6/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (6/10) loss: 0.6255345821380615 | loss/mean: 0.6255345821380615 | loss/std: 0.06267037559139985 | lr: 0.02 | momentum: 0.9


6/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (6/10) loss: 0.6341090679168702 | loss/mean: 0.6341090679168702 | loss/std: 0.08783070643409521 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 


7/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (7/10) loss: 0.6523970305919646 | loss/mean: 0.6523970305919646 | loss/std: 0.06705678775369037 | lr: 0.02 | momentum: 0.9


7/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (7/10) loss: 0.6019370317459106 | loss/mean: 0.6019370317459106 | loss/std: 0.043033990488841795 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 


8/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (8/10) loss: 0.6002037048339844 | loss/mean: 0.6002037048339844 | loss/std: 0.04473723163948611 | lr: 0.02 | momentum: 0.9


8/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (8/10) loss: 0.5850607395172119 | loss/mean: 0.5850607395172119 | loss/std: 0.03373068103328931 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 


9/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (9/10) loss: 0.5995481789112092 | loss/mean: 0.5995481789112092 | loss/std: 0.053460196453178156 | lr: 0.02 | momentum: 0.9


9/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (9/10) loss: 0.5854417443275451 | loss/mean: 0.5854417443275451 | loss/std: 0.03361002599030699 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 


10/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (10/10) loss: 0.6018306851387024 | loss/mean: 0.6018306851387024 | loss/std: 0.05862010845514806 | lr: 0.02 | momentum: 0.9


10/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (10/10) loss: 0.5876497507095337 | loss/mean: 0.5876497507095337 | loss/std: 0.03387908444959286 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top models:
experiments/model.0010.pth	10.0000


1/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (1/10) loss: 1.030384612083435 | loss/mean: 1.030384612083435 | loss/std: 0.05889908995871453 | lr: 0.02 | momentum: 0.9


1/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (1/10) loss: 0.889264190196991 | loss/mean: 0.889264190196991 | loss/std: 0.07594285963109668 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 


2/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (2/10) loss: 0.8400177657604218 | loss/mean: 0.8400177657604218 | loss/std: 0.052500551159279533 | lr: 0.02 | momentum: 0.9


2/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (2/10) loss: 0.7460446357727051 | loss/mean: 0.7460446357727051 | loss/std: 0.08366475475444467 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 


3/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (3/10) loss: 0.7301766097545623 | loss/mean: 0.7301766097545623 | loss/std: 0.0377262902326515 | lr: 0.02 | momentum: 0.9


3/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (3/10) loss: 0.6593926668167114 | loss/mean: 0.6593926668167114 | loss/std: 0.05148089197151414 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 


4/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (4/10) loss: 0.6395414531230927 | loss/mean: 0.6395414531230927 | loss/std: 0.031129134425629056 | lr: 0.02 | momentum: 0.9


4/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (4/10) loss: 0.625333595275879 | loss/mean: 0.625333595275879 | loss/std: 0.06244953207933074 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 


5/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (5/10) loss: 0.6152883589267731 | loss/mean: 0.6152883589267731 | loss/std: 0.0452990633632833 | lr: 0.02 | momentum: 0.9


5/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (5/10) loss: 0.6267513036727905 | loss/mean: 0.6267513036727905 | loss/std: 0.07499939920080582 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 


6/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (6/10) loss: 0.6247577369213104 | loss/mean: 0.6247577369213104 | loss/std: 0.05073953318575833 | lr: 0.02 | momentum: 0.9


6/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (6/10) loss: 0.6290136098861694 | loss/mean: 0.6290136098861694 | loss/std: 0.07837733149168322 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 


7/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (7/10) loss: 0.6636632025241852 | loss/mean: 0.6636632025241852 | loss/std: 0.0740274789740619 | lr: 0.02 | momentum: 0.9


7/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (7/10) loss: 0.618508493900299 | loss/mean: 0.618508493900299 | loss/std: 0.06324150470386247 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 


8/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (8/10) loss: 0.5954481780529023 | loss/mean: 0.5954481780529023 | loss/std: 0.04243447975954259 | lr: 0.02 | momentum: 0.9


8/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (8/10) loss: 0.5832824349403382 | loss/mean: 0.5832824349403382 | loss/std: 0.041847142169141406 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 


9/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (9/10) loss: 0.6025312185287476 | loss/mean: 0.6025312185287476 | loss/std: 0.05560368050402522 | lr: 0.02 | momentum: 0.9


9/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (9/10) loss: 0.5835438966751099 | loss/mean: 0.5835438966751099 | loss/std: 0.03130113548181574 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 


10/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (10/10) loss: 0.6097554326057435 | loss/mean: 0.6097554326057435 | loss/std: 0.06423236040724935 | lr: 0.02 | momentum: 0.9


10/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (10/10) loss: 0.5867594361305237 | loss/mean: 0.5867594361305237 | loss/std: 0.0324077627852769 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top models:
experiments/model.0010.pth	10.0000


1/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (1/10) loss: 1.0080785036087037 | loss/mean: 1.0080785036087037 | loss/std: 0.08541860751075857 | lr: 0.02 | momentum: 0.9


1/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (1/10) loss: 0.7953631639480591 | loss/mean: 0.7953631639480591 | loss/std: 0.07878890729451458 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 


2/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (2/10) loss: 0.818928861618042 | loss/mean: 0.818928861618042 | loss/std: 0.05940525658403836 | lr: 0.02 | momentum: 0.9


2/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (2/10) loss: 0.7206320405006409 | loss/mean: 0.7206320405006409 | loss/std: 0.06816991781086404 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 


3/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (3/10) loss: 0.7136258780956268 | loss/mean: 0.7136258780956268 | loss/std: 0.07598604328289944 | lr: 0.02 | momentum: 0.9


3/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (3/10) loss: 0.737908935546875 | loss/mean: 0.737908935546875 | loss/std: 0.12279696938346715 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 


4/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (4/10) loss: 0.6147135257720947 | loss/mean: 0.6147135257720947 | loss/std: 0.05736337787150809 | lr: 0.02 | momentum: 0.9


4/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (4/10) loss: 0.5890608668327332 | loss/mean: 0.5890608668327332 | loss/std: 0.06706301116761898 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 


5/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (5/10) loss: 0.6178202152252197 | loss/mean: 0.6178202152252197 | loss/std: 0.06974262494715676 | lr: 0.02 | momentum: 0.9


5/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (5/10) loss: 0.576318621635437 | loss/mean: 0.576318621635437 | loss/std: 0.04698622808515894 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 


6/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (6/10) loss: 0.6060700893402099 | loss/mean: 0.6060700893402099 | loss/std: 0.06274153273810083 | lr: 0.02 | momentum: 0.9


6/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (6/10) loss: 0.576352846622467 | loss/mean: 0.576352846622467 | loss/std: 0.0494511550218018 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 


7/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (7/10) loss: 0.7374445855617523 | loss/mean: 0.7374445855617523 | loss/std: 0.1294084774262543 | lr: 0.02 | momentum: 0.9


7/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (7/10) loss: 0.5788604497909546 | loss/mean: 0.5788604497909546 | loss/std: 0.038373300763406046 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 


8/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (8/10) loss: 0.7007094979286193 | loss/mean: 0.7007094979286193 | loss/std: 0.13720586687168815 | lr: 0.02 | momentum: 0.9


8/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (8/10) loss: 0.7703786969184876 | loss/mean: 0.7703786969184876 | loss/std: 0.13306622471146004 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 


9/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (9/10) loss: 0.8053872525691986 | loss/mean: 0.8053872525691986 | loss/std: 0.24754393830716528 | lr: 0.02 | momentum: 0.9


9/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (9/10) loss: 0.595939576625824 | loss/mean: 0.595939576625824 | loss/std: 0.05444768524853363 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 


10/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (10/10) loss: 0.7851045429706573 | loss/mean: 0.7851045429706573 | loss/std: 0.12580634509586233 | lr: 0.02 | momentum: 0.9


10/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (10/10) loss: 0.8507855415344239 | loss/mean: 0.8507855415344239 | loss/std: 0.20167085492802184 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top models:
experiments/model.0010.pth	10.0000


1/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (1/10) loss: 1.0023467898368836 | loss/mean: 1.0023467898368836 | loss/std: 0.07649850613967357 | lr: 0.02 | momentum: 0.9


1/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (1/10) loss: 0.8623501658439636 | loss/mean: 0.8623501658439636 | loss/std: 0.11431776314435774 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 


2/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (2/10) loss: 0.8327854633331299 | loss/mean: 0.8327854633331299 | loss/std: 0.04899256678324851 | lr: 0.02 | momentum: 0.9


2/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (2/10) loss: 0.7458891034126282 | loss/mean: 0.7458891034126282 | loss/std: 0.06486000217883725 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 


3/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (3/10) loss: 0.7356805801391602 | loss/mean: 0.7356805801391602 | loss/std: 0.03578166955761657 | lr: 0.02 | momentum: 0.9


3/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (3/10) loss: 0.6834115147590637 | loss/mean: 0.6834115147590637 | loss/std: 0.045832271603445454 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 


4/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (4/10) loss: 0.6760709345340729 | loss/mean: 0.6760709345340729 | loss/std: 0.03518247497814413 | lr: 0.02 | momentum: 0.9


4/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (4/10) loss: 0.6406271934509278 | loss/mean: 0.6406271934509278 | loss/std: 0.04140880211645237 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 


5/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (5/10) loss: 0.6477998435497284 | loss/mean: 0.6477998435497284 | loss/std: 0.04058230220363298 | lr: 0.02 | momentum: 0.9


5/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (5/10) loss: 0.6176398515701294 | loss/mean: 0.6176398515701294 | loss/std: 0.03590413012099117 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 


6/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (6/10) loss: 0.6391749382019043 | loss/mean: 0.6391749382019043 | loss/std: 0.04833057989022951 | lr: 0.02 | momentum: 0.9


6/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (6/10) loss: 0.610216212272644 | loss/mean: 0.610216212272644 | loss/std: 0.03604491000124478 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 


7/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (7/10) loss: 0.6224183380603789 | loss/mean: 0.6224183380603789 | loss/std: 0.04064154145552094 | lr: 0.02 | momentum: 0.9


7/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (7/10) loss: 0.6014699578285218 | loss/mean: 0.6014699578285218 | loss/std: 0.03206254131789701 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 


8/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (8/10) loss: 0.6142204105854034 | loss/mean: 0.6142204105854034 | loss/std: 0.03921218294442682 | lr: 0.02 | momentum: 0.9


8/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (8/10) loss: 0.5965311527252197 | loss/mean: 0.5965311527252197 | loss/std: 0.031345393719239924 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 


9/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (9/10) loss: 0.6139503777027131 | loss/mean: 0.6139503777027131 | loss/std: 0.044326401040470025 | lr: 0.02 | momentum: 0.9


9/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (9/10) loss: 0.5950181007385253 | loss/mean: 0.5950181007385253 | loss/std: 0.030687218319508405 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 


10/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (10/10) loss: 0.6073829233646393 | loss/mean: 0.6073829233646393 | loss/std: 0.04035701035852891 | lr: 0.02 | momentum: 0.9


10/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (10/10) loss: 0.5918319582939148 | loss/mean: 0.5918319582939148 | loss/std: 0.030397975427927635 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top models:
experiments/model.0010.pth	10.0000


1/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (1/10) loss: 1.0525316834449767 | loss/mean: 1.0525316834449767 | loss/std: 0.06314712628389368 | lr: 0.02 | momentum: 0.9


1/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (1/10) loss: 0.9323266983032227 | loss/mean: 0.9323266983032227 | loss/std: 0.0897917853405879 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 


2/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (2/10) loss: 0.8405437886714935 | loss/mean: 0.8405437886714935 | loss/std: 0.07179157015925632 | lr: 0.02 | momentum: 0.9


2/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (2/10) loss: 0.6857564210891723 | loss/mean: 0.6857564210891723 | loss/std: 0.05574015271326607 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 


3/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (3/10) loss: 0.6526101648807525 | loss/mean: 0.6526101648807525 | loss/std: 0.038020492866858664 | lr: 0.02 | momentum: 0.9


3/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (3/10) loss: 0.612192976474762 | loss/mean: 0.612192976474762 | loss/std: 0.059825082594436384 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 


4/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (4/10) loss: 0.6122573792934418 | loss/mean: 0.6122573792934418 | loss/std: 0.046754344365883056 | lr: 0.02 | momentum: 0.9


4/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (4/10) loss: 0.5831914901733398 | loss/mean: 0.5831914901733398 | loss/std: 0.04948646103543325 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 


5/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (5/10) loss: 0.5867764890193939 | loss/mean: 0.5867764890193939 | loss/std: 0.044404610824837876 | lr: 0.02 | momentum: 0.9


5/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (5/10) loss: 0.5784675121307373 | loss/mean: 0.5784675121307373 | loss/std: 0.03960115856009696 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 


6/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (6/10) loss: 0.607336974143982 | loss/mean: 0.607336974143982 | loss/std: 0.07690192932889761 | lr: 0.02 | momentum: 0.9


6/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (6/10) loss: 0.6088173270225525 | loss/mean: 0.6088173270225525 | loss/std: 0.07986065865647471 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 


7/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (7/10) loss: 0.6201981365680694 | loss/mean: 0.6201981365680694 | loss/std: 0.07084973884502109 | lr: 0.02 | momentum: 0.9


7/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (7/10) loss: 0.6548127651214599 | loss/mean: 0.6548127651214599 | loss/std: 0.09714792607392517 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 


8/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (8/10) loss: 0.671356862783432 | loss/mean: 0.671356862783432 | loss/std: 0.08920936367903208 | lr: 0.02 | momentum: 0.9


8/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (8/10) loss: 0.5991439461708069 | loss/mean: 0.5991439461708069 | loss/std: 0.04640723815088218 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 


9/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (9/10) loss: 0.6806130945682526 | loss/mean: 0.6806130945682526 | loss/std: 0.09990837237403949 | lr: 0.02 | momentum: 0.9


9/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (9/10) loss: 0.6311645269393921 | loss/mean: 0.6311645269393921 | loss/std: 0.08406716995964679 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 


10/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (10/10) loss: 0.6176819562911987 | loss/mean: 0.6176819562911987 | loss/std: 0.05440783977916336 | lr: 0.02 | momentum: 0.9


10/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (10/10) loss: 0.5827178359031677 | loss/mean: 0.5827178359031677 | loss/std: 0.04177683515262233 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top models:
experiments/model.0010.pth	10.0000


1/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (1/10) loss: 1.1029972672462465 | loss/mean: 1.1029972672462465 | loss/std: 0.030882823537679606 | lr: 0.02 | momentum: 0.9


1/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (1/10) loss: 1.1166245460510253 | loss/mean: 1.1166245460510253 | loss/std: 0.05210680025625237 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 


2/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (2/10) loss: 1.1001486778259277 | loss/mean: 1.1001486778259277 | loss/std: 0.025974360725808297 | lr: 0.02 | momentum: 0.9


2/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (2/10) loss: 1.1130475282669068 | loss/mean: 1.1130475282669068 | loss/std: 0.04357351555540951 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 


3/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (3/10) loss: 1.0988327264785767 | loss/mean: 1.0988327264785767 | loss/std: 0.02274453167142318 | lr: 0.02 | momentum: 0.9


3/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (3/10) loss: 1.1106632947921753 | loss/mean: 1.1106632947921753 | loss/std: 0.037172401344391835 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 


4/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (4/10) loss: 1.0981364488601684 | loss/mean: 1.0981364488601684 | loss/std: 0.02057658399331713 | lr: 0.02 | momentum: 0.9


4/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (4/10) loss: 1.1090929985046387 | loss/mean: 1.1090929985046387 | loss/std: 0.032608227517009786 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 


5/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (5/10) loss: 1.097789227962494 | loss/mean: 1.097789227962494 | loss/std: 0.019165847346960606 | lr: 0.02 | momentum: 0.9


5/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (5/10) loss: 1.1080559968948365 | loss/mean: 1.1080559968948365 | loss/std: 0.029458484094012638 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 


6/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (6/10) loss: 1.0976251482963562 | loss/mean: 1.0976251482963562 | loss/std: 0.018244147410784936 | lr: 0.02 | momentum: 0.9


6/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (6/10) loss: 1.1073584079742431 | loss/mean: 1.1073584079742431 | loss/std: 0.027307238426226906 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 


7/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (7/10) loss: 1.0975504040718078 | loss/mean: 1.0975504040718078 | loss/std: 0.017617901689430702 | lr: 0.02 | momentum: 0.9


7/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (7/10) loss: 1.1068762063980102 | loss/mean: 1.1068762063980102 | loss/std: 0.025825682571447156 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 


8/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (8/10) loss: 1.097516942024231 | loss/mean: 1.097516942024231 | loss/std: 0.017167153732126853 | lr: 0.02 | momentum: 0.9


8/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (8/10) loss: 1.106532382965088 | loss/mean: 1.106532382965088 | loss/std: 0.02478455928460296 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 


9/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (9/10) loss: 1.0975015282630922 | loss/mean: 1.0975015282630922 | loss/std: 0.016823516321279566 | lr: 0.02 | momentum: 0.9


9/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (9/10) loss: 1.1062801599502563 | loss/mean: 1.1062801599502563 | loss/std: 0.024034461315775812 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 


10/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (10/10) loss: 1.097493803501129 | loss/mean: 1.097493803501129 | loss/std: 0.016549599310891196 | lr: 0.02 | momentum: 0.9


10/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (10/10) loss: 1.1060896635055542 | loss/mean: 1.1060896635055542 | loss/std: 0.023479974393890213 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top models:
experiments/model.0010.pth	10.0000


1/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (1/10) loss: 0.9295184850692749 | loss/mean: 0.9295184850692749 | loss/std: 0.11816499077979638 | lr: 0.02 | momentum: 0.9


1/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (1/10) loss: 0.7207405686378479 | loss/mean: 0.7207405686378479 | loss/std: 0.06373605385308208 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 


2/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (2/10) loss: 0.7089476227760315 | loss/mean: 0.7089476227760315 | loss/std: 0.05910339521529409 | lr: 0.02 | momentum: 0.9


2/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (2/10) loss: 0.6538740754127502 | loss/mean: 0.6538740754127502 | loss/std: 0.07357587447163616 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 


3/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (3/10) loss: 0.6506457149982453 | loss/mean: 0.6506457149982453 | loss/std: 0.04724080174061814 | lr: 0.02 | momentum: 0.9


3/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (3/10) loss: 0.6452373147010804 | loss/mean: 0.6452373147010804 | loss/std: 0.08077170518205681 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 


4/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (4/10) loss: 0.6553415358066559 | loss/mean: 0.6553415358066559 | loss/std: 0.06528218567169962 | lr: 0.02 | momentum: 0.9


4/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (4/10) loss: 0.5835132718086242 | loss/mean: 0.5835132718086242 | loss/std: 0.02969018320211091 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 


5/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (5/10) loss: 0.658927583694458 | loss/mean: 0.658927583694458 | loss/std: 0.08925094112221889 | lr: 0.02 | momentum: 0.9


5/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (5/10) loss: 0.5781643390655518 | loss/mean: 0.5781643390655518 | loss/std: 0.026995578725583457 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 


6/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (6/10) loss: 0.5916520535945892 | loss/mean: 0.5916520535945892 | loss/std: 0.05542719370796635 | lr: 0.02 | momentum: 0.9


6/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (6/10) loss: 0.5884732604026794 | loss/mean: 0.5884732604026794 | loss/std: 0.05555131765269237 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 


7/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (7/10) loss: 0.6165164768695831 | loss/mean: 0.6165164768695831 | loss/std: 0.06690575787409085 | lr: 0.02 | momentum: 0.9


7/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (7/10) loss: 0.5893872499465942 | loss/mean: 0.5893872499465942 | loss/std: 0.0356033560157542 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 


8/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (8/10) loss: 0.5954655408859253 | loss/mean: 0.5954655408859253 | loss/std: 0.0423661162545737 | lr: 0.02 | momentum: 0.9


8/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (8/10) loss: 0.5785757660865783 | loss/mean: 0.5785757660865783 | loss/std: 0.03422324969193398 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 


9/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (9/10) loss: 0.6126774966716767 | loss/mean: 0.6126774966716767 | loss/std: 0.07111737698708845 | lr: 0.02 | momentum: 0.9


9/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (9/10) loss: 0.5862150907516479 | loss/mean: 0.5862150907516479 | loss/std: 0.03563951503785974 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 


10/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (10/10) loss: 0.5786520719528199 | loss/mean: 0.5786520719528199 | loss/std: 0.04275780687441831 | lr: 0.02 | momentum: 0.9


10/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (10/10) loss: 0.5759395360946655 | loss/mean: 0.5759395360946655 | loss/std: 0.04216534677937313 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top models:
experiments/model.0010.pth	10.0000


1/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (1/10) loss: 1.0865851998329163 | loss/mean: 1.0865851998329163 | loss/std: 0.02669242917748531 | lr: 0.02 | momentum: 0.9


1/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (1/10) loss: 0.9957550406455994 | loss/mean: 0.9957550406455994 | loss/std: 0.03012931323655291 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 


2/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (2/10) loss: 0.8773194134235383 | loss/mean: 0.8773194134235383 | loss/std: 0.0790910401556081 | lr: 0.02 | momentum: 0.9


2/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (2/10) loss: 0.7334404706954956 | loss/mean: 0.7334404706954956 | loss/std: 0.08508805069200134 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 


3/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (3/10) loss: 0.6993038773536682 | loss/mean: 0.6993038773536682 | loss/std: 0.039025352332644855 | lr: 0.02 | momentum: 0.9


3/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (3/10) loss: 0.7210292458534241 | loss/mean: 0.7210292458534241 | loss/std: 0.11468677047429582 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 


4/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (4/10) loss: 0.6287056386470795 | loss/mean: 0.6287056386470795 | loss/std: 0.035211847241743026 | lr: 0.02 | momentum: 0.9


4/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (4/10) loss: 0.62687908411026 | loss/mean: 0.62687908411026 | loss/std: 0.08721871425374866 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 


5/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (5/10) loss: 0.6369826316833497 | loss/mean: 0.6369826316833497 | loss/std: 0.09800166540094854 | lr: 0.02 | momentum: 0.9


5/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (5/10) loss: 0.769257640838623 | loss/mean: 0.769257640838623 | loss/std: 0.13475983953902076 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 


6/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (6/10) loss: 0.6519611597061158 | loss/mean: 0.6519611597061158 | loss/std: 0.07514423994841933 | lr: 0.02 | momentum: 0.9


6/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (6/10) loss: 0.7591234922409058 | loss/mean: 0.7591234922409058 | loss/std: 0.13014316400440495 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 


7/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (7/10) loss: 0.7009758830070494 | loss/mean: 0.7009758830070494 | loss/std: 0.12501142970338164 | lr: 0.02 | momentum: 0.9


7/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (7/10) loss: 0.6994033455848694 | loss/mean: 0.6994033455848694 | loss/std: 0.09429992695791868 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 


8/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (8/10) loss: 0.6517475843429565 | loss/mean: 0.6517475843429565 | loss/std: 0.09539866891202887 | lr: 0.02 | momentum: 0.9


8/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (8/10) loss: 0.6109590172767639 | loss/mean: 0.6109590172767639 | loss/std: 0.08249543386702406 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 


9/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (9/10) loss: 0.6110015749931336 | loss/mean: 0.6110015749931336 | loss/std: 0.07539349750466828 | lr: 0.02 | momentum: 0.9


9/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (9/10) loss: 0.608959698677063 | loss/mean: 0.608959698677063 | loss/std: 0.044434016277417936 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 


10/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (10/10) loss: 0.6099966466426849 | loss/mean: 0.6099966466426849 | loss/std: 0.056447309308512786 | lr: 0.02 | momentum: 0.9


10/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (10/10) loss: 0.5926025390625 | loss/mean: 0.5926025390625 | loss/std: 0.03774171159872699 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top models:
experiments/model.0010.pth	10.0000


1/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (1/10) loss: 0.993522238731384 | loss/mean: 0.993522238731384 | loss/std: 0.0988950343392185 | lr: 0.02 | momentum: 0.9


1/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (1/10) loss: 0.8101032614707947 | loss/mean: 0.8101032614707947 | loss/std: 0.15688483624036073 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 


2/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (2/10) loss: 0.7894082188606263 | loss/mean: 0.7894082188606263 | loss/std: 0.09536109618425472 | lr: 0.02 | momentum: 0.9


2/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (2/10) loss: 0.6204255819320679 | loss/mean: 0.6204255819320679 | loss/std: 0.0644742268398185 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 


3/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (3/10) loss: 0.6306946277618408 | loss/mean: 0.6306946277618408 | loss/std: 0.14192770513857622 | lr: 0.02 | momentum: 0.9


3/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (3/10) loss: 0.670367443561554 | loss/mean: 0.670367443561554 | loss/std: 0.09262036587306376 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 


4/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (4/10) loss: 0.9241598784923553 | loss/mean: 0.9241598784923553 | loss/std: 0.18383546729379857 | lr: 0.02 | momentum: 0.9


4/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (4/10) loss: 0.8714446306228638 | loss/mean: 0.8714446306228638 | loss/std: 0.17379329578268435 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 


5/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (5/10) loss: 0.9402637779712677 | loss/mean: 0.9402637779712677 | loss/std: 0.16203113991679716 | lr: 0.02 | momentum: 0.9


5/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (5/10) loss: 0.8714446306228638 | loss/mean: 0.8714446306228638 | loss/std: 0.17379329578268435 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 


6/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (6/10) loss: 0.8914446353912354 | loss/mean: 0.8914446353912354 | loss/std: 0.1363300141072458 | lr: 0.02 | momentum: 0.9


6/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (6/10) loss: 0.8714446306228638 | loss/mean: 0.8714446306228638 | loss/std: 0.17379329578268435 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 


7/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (7/10) loss: 0.8914446353912354 | loss/mean: 0.8914446353912354 | loss/std: 0.1363300141072458 | lr: 0.02 | momentum: 0.9


7/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (7/10) loss: 0.8714446306228638 | loss/mean: 0.8714446306228638 | loss/std: 0.17379329578268435 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 


8/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (8/10) loss: 0.8914446413516999 | loss/mean: 0.8914446413516999 | loss/std: 0.13633000792449745 | lr: 0.02 | momentum: 0.9


8/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (8/10) loss: 0.8714446306228638 | loss/mean: 0.8714446306228638 | loss/std: 0.17379329578268435 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 


9/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (9/10) loss: 0.8914446592330932 | loss/mean: 0.8914446592330932 | loss/std: 0.13633001587376004 | lr: 0.02 | momentum: 0.9


9/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (9/10) loss: 0.8714446306228638 | loss/mean: 0.8714446306228638 | loss/std: 0.17379329578268435 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 


10/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (10/10) loss: 0.8914446651935578 | loss/mean: 0.8914446651935578 | loss/std: 0.13633001852351362 | lr: 0.02 | momentum: 0.9


10/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (10/10) loss: 0.8714446306228638 | loss/mean: 0.8714446306228638 | loss/std: 0.17379329578268435 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top models:
experiments/model.0010.pth	10.0000


1/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (1/10) loss: 0.9553937315940858 | loss/mean: 0.9553937315940858 | loss/std: 0.09760076823614088 | lr: 0.02 | momentum: 0.9


1/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (1/10) loss: 0.7796916842460633 | loss/mean: 0.7796916842460633 | loss/std: 0.14193713886697062 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 


2/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (2/10) loss: 0.732365620136261 | loss/mean: 0.732365620136261 | loss/std: 0.07209236827890421 | lr: 0.02 | momentum: 0.9


2/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (2/10) loss: 0.6251379489898682 | loss/mean: 0.6251379489898682 | loss/std: 0.054022197415904846 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 


3/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (3/10) loss: 0.6443577826023102 | loss/mean: 0.6443577826023102 | loss/std: 0.08397720456300388 | lr: 0.02 | momentum: 0.9


3/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (3/10) loss: 0.6583304047584534 | loss/mean: 0.6583304047584534 | loss/std: 0.09681627583615374 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 


4/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (4/10) loss: 0.6204826354980468 | loss/mean: 0.6204826354980468 | loss/std: 0.08434629249636576 | lr: 0.02 | momentum: 0.9


4/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (4/10) loss: 0.5774261593818665 | loss/mean: 0.5774261593818665 | loss/std: 0.04094954352109144 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 


5/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (5/10) loss: 0.5807715117931366 | loss/mean: 0.5807715117931366 | loss/std: 0.043754491029493275 | lr: 0.02 | momentum: 0.9


5/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (5/10) loss: 0.5768926620483399 | loss/mean: 0.5768926620483399 | loss/std: 0.03904288006267863 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 


6/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (6/10) loss: 0.6020415186882019 | loss/mean: 0.6020415186882019 | loss/std: 0.073110166746207 | lr: 0.02 | momentum: 0.9


6/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (6/10) loss: 0.6089018225669861 | loss/mean: 0.6089018225669861 | loss/std: 0.08376878401275896 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 


7/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (7/10) loss: 0.6573756873607635 | loss/mean: 0.6573756873607635 | loss/std: 0.09547846176498638 | lr: 0.02 | momentum: 0.9


7/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (7/10) loss: 0.5762292742729187 | loss/mean: 0.5762292742729187 | loss/std: 0.031830793447807856 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 


8/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (8/10) loss: 0.6764450430870057 | loss/mean: 0.6764450430870057 | loss/std: 0.101566993811771 | lr: 0.02 | momentum: 0.9


8/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (8/10) loss: 0.6201241254806519 | loss/mean: 0.6201241254806519 | loss/std: 0.0772582049333254 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 


9/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (9/10) loss: 0.6454759716987609 | loss/mean: 0.6454759716987609 | loss/std: 0.08745203758181198 | lr: 0.02 | momentum: 0.9


9/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (9/10) loss: 0.615847110748291 | loss/mean: 0.615847110748291 | loss/std: 0.07522166046555981 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 


10/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (10/10) loss: 0.7035221815109253 | loss/mean: 0.7035221815109253 | loss/std: 0.08974659087379397 | lr: 0.02 | momentum: 0.9


10/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (10/10) loss: 0.6509636759757995 | loss/mean: 0.6509636759757995 | loss/std: 0.08153583651263803 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top models:
experiments/model.0010.pth	10.0000


1/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (1/10) loss: 1.0044304788112641 | loss/mean: 1.0044304788112641 | loss/std: 0.10877173936713405 | lr: 0.02 | momentum: 0.9


1/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (1/10) loss: 0.771057665348053 | loss/mean: 0.771057665348053 | loss/std: 0.15362133783497992 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 


2/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (2/10) loss: 0.8821503460407257 | loss/mean: 0.8821503460407257 | loss/std: 0.1137196787329783 | lr: 0.02 | momentum: 0.9


2/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (2/10) loss: 0.8716522336006165 | loss/mean: 0.8716522336006165 | loss/std: 0.17260687090434046 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 


3/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (3/10) loss: 0.8918575167655944 | loss/mean: 0.8918575167655944 | loss/std: 0.13602882251182236 | lr: 0.02 | momentum: 0.9


3/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (3/10) loss: 0.8715033650398254 | loss/mean: 0.8715033650398254 | loss/std: 0.17374847888423195 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 


4/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (4/10) loss: 0.8915224313735963 | loss/mean: 0.8915224313735963 | loss/std: 0.13631426071079072 | lr: 0.02 | momentum: 0.9


4/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (4/10) loss: 0.8714603900909423 | loss/mean: 0.8714603900909423 | loss/std: 0.17378626619262938 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 


5/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (5/10) loss: 0.8914635241031647 | loss/mean: 0.8914635241031647 | loss/std: 0.13633110816675317 | lr: 0.02 | momentum: 0.9


5/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (5/10) loss: 0.8714447975158691 | loss/mean: 0.8714447975158691 | loss/std: 0.1737932201908571 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 


6/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (6/10) loss: 0.891444844007492 | loss/mean: 0.891444844007492 | loss/std: 0.13632990370108156 | lr: 0.02 | momentum: 0.9


6/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (6/10) loss: 0.8714446425437927 | loss/mean: 0.8714446425437927 | loss/std: 0.17379328738359182 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 


7/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (7/10) loss: 0.8914446592330932 | loss/mean: 0.8914446592330932 | loss/std: 0.1363299893762518 | lr: 0.02 | momentum: 0.9


7/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (7/10) loss: 0.8714446306228638 | loss/mean: 0.8714446306228638 | loss/std: 0.17379329578268435 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 


8/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (8/10) loss: 0.8914446473121643 | loss/mean: 0.8914446473121643 | loss/std: 0.13633000174174859 | lr: 0.02 | momentum: 0.9


8/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (8/10) loss: 0.8714446306228638 | loss/mean: 0.8714446306228638 | loss/std: 0.17379329578268435 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 


9/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (9/10) loss: 0.8914446413516999 | loss/mean: 0.8914446413516999 | loss/std: 0.13633000792449745 | lr: 0.02 | momentum: 0.9


9/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (9/10) loss: 0.8714446306228638 | loss/mean: 0.8714446306228638 | loss/std: 0.17379329578268435 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 


10/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (10/10) loss: 0.8914446413516999 | loss/mean: 0.8914446413516999 | loss/std: 0.13633000792449745 | lr: 0.02 | momentum: 0.9


10/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (10/10) loss: 0.8714446306228638 | loss/mean: 0.8714446306228638 | loss/std: 0.17379329578268435 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top models:
experiments/model.0010.pth	10.0000


1/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (1/10) loss: 0.9628518640995025 | loss/mean: 0.9628518640995025 | loss/std: 0.10723004727073507 | lr: 0.02 | momentum: 0.9


1/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (1/10) loss: 0.7541336297988892 | loss/mean: 0.7541336297988892 | loss/std: 0.09873116125849805 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 


2/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (2/10) loss: 0.7294815659523011 | loss/mean: 0.7294815659523011 | loss/std: 0.06056715970615712 | lr: 0.02 | momentum: 0.9


2/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (2/10) loss: 0.6862273931503295 | loss/mean: 0.6862273931503295 | loss/std: 0.09502895841614464 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 


3/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (3/10) loss: 0.6540158092975616 | loss/mean: 0.6540158092975616 | loss/std: 0.055004085989021914 | lr: 0.02 | momentum: 0.9


3/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (3/10) loss: 0.6428385972976685 | loss/mean: 0.6428385972976685 | loss/std: 0.08024438854367688 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 


4/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (4/10) loss: 0.6169372379779816 | loss/mean: 0.6169372379779816 | loss/std: 0.05213639732653385 | lr: 0.02 | momentum: 0.9


4/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (4/10) loss: 0.6522641181945801 | loss/mean: 0.6522641181945801 | loss/std: 0.0854022378398717 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 


5/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (5/10) loss: 0.6491285264492035 | loss/mean: 0.6491285264492035 | loss/std: 0.06410895663507568 | lr: 0.02 | momentum: 0.9


5/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (5/10) loss: 0.5800342440605164 | loss/mean: 0.5800342440605164 | loss/std: 0.0294379507564773 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 


6/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (6/10) loss: 0.6759701609611511 | loss/mean: 0.6759701609611511 | loss/std: 0.09590905749202389 | lr: 0.02 | momentum: 0.9


6/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (6/10) loss: 0.6094068169593811 | loss/mean: 0.6094068169593811 | loss/std: 0.065700797185268 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 


7/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (7/10) loss: 0.6148939371109008 | loss/mean: 0.6148939371109008 | loss/std: 0.06355755966376743 | lr: 0.02 | momentum: 0.9


7/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (7/10) loss: 0.6062946677207947 | loss/mean: 0.6062946677207947 | loss/std: 0.06196569728866336 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 


8/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (8/10) loss: 0.6966108500957489 | loss/mean: 0.6966108500957489 | loss/std: 0.09254032441777513 | lr: 0.02 | momentum: 0.9


8/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (8/10) loss: 0.6369543194770813 | loss/mean: 0.6369543194770813 | loss/std: 0.07358675983684443 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 


9/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (9/10) loss: 0.636751651763916 | loss/mean: 0.636751651763916 | loss/std: 0.08279972642872241 | lr: 0.02 | momentum: 0.9


9/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (9/10) loss: 0.5896688103675842 | loss/mean: 0.5896688103675842 | loss/std: 0.04329789745940511 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 


10/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (10/10) loss: 0.761536979675293 | loss/mean: 0.761536979675293 | loss/std: 0.1438176405414165 | lr: 0.02 | momentum: 0.9


10/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (10/10) loss: 0.8410662293434144 | loss/mean: 0.8410662293434144 | loss/std: 0.1974593879450405 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top models:
experiments/model.0010.pth	10.0000


1/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (1/10) loss: 1.0243802726268767 | loss/mean: 1.0243802726268767 | loss/std: 0.05337499653421735 | lr: 0.02 | momentum: 0.9


1/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (1/10) loss: 0.8920404434204101 | loss/mean: 0.8920404434204101 | loss/std: 0.06828996019274314 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 


2/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (2/10) loss: 0.8567329943180084 | loss/mean: 0.8567329943180084 | loss/std: 0.055924047199965576 | lr: 0.02 | momentum: 0.9


2/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (2/10) loss: 0.775898551940918 | loss/mean: 0.775898551940918 | loss/std: 0.09619204248336577 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 


3/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (3/10) loss: 0.7720997035503387 | loss/mean: 0.7720997035503387 | loss/std: 0.038721477262860564 | lr: 0.02 | momentum: 0.9


3/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (3/10) loss: 0.7205637454986572 | loss/mean: 0.7205637454986572 | loss/std: 0.05871418285989389 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 


4/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (4/10) loss: 0.7129487216472626 | loss/mean: 0.7129487216472626 | loss/std: 0.033913705152103395 | lr: 0.02 | momentum: 0.9


4/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (4/10) loss: 0.6711419224739075 | loss/mean: 0.6711419224739075 | loss/std: 0.040764837289232 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 


5/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (5/10) loss: 0.6704505383968353 | loss/mean: 0.6704505383968353 | loss/std: 0.03674235172391265 | lr: 0.02 | momentum: 0.9


5/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (5/10) loss: 0.638830554485321 | loss/mean: 0.638830554485321 | loss/std: 0.03596291070391464 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 


6/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (6/10) loss: 0.6466986238956451 | loss/mean: 0.6466986238956451 | loss/std: 0.037822430475311516 | lr: 0.02 | momentum: 0.9


6/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (6/10) loss: 0.6205038070678711 | loss/mean: 0.6205038070678711 | loss/std: 0.03276770069421587 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 


7/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (7/10) loss: 0.6325141727924347 | loss/mean: 0.6325141727924347 | loss/std: 0.03792020984618135 | lr: 0.02 | momentum: 0.9


7/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (7/10) loss: 0.6097129583358765 | loss/mean: 0.6097129583358765 | loss/std: 0.030720815464026306 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 


8/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (8/10) loss: 0.622522383928299 | loss/mean: 0.622522383928299 | loss/std: 0.03725671638901827 | lr: 0.02 | momentum: 0.9


8/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (8/10) loss: 0.6025189518928528 | loss/mean: 0.6025189518928528 | loss/std: 0.029049975862910575 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 


9/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (9/10) loss: 0.6161664843559265 | loss/mean: 0.6161664843559265 | loss/std: 0.03752382267459558 | lr: 0.02 | momentum: 0.9


9/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (9/10) loss: 0.5978699922561646 | loss/mean: 0.5978699922561646 | loss/std: 0.028102768984997712 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 


10/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (10/10) loss: 0.6117752909660339 | loss/mean: 0.6117752909660339 | loss/std: 0.037999920706788995 | lr: 0.02 | momentum: 0.9


10/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (10/10) loss: 0.5947501063346863 | loss/mean: 0.5947501063346863 | loss/std: 0.02751441739589158 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top models:
experiments/model.0010.pth	10.0000


1/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (1/10) loss: 0.9843781411647796 | loss/mean: 0.9843781411647796 | loss/std: 0.06719123413166508 | lr: 0.02 | momentum: 0.9


1/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (1/10) loss: 0.8421510577201843 | loss/mean: 0.8421510577201843 | loss/std: 0.08891039332051809 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 


2/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (2/10) loss: 0.8271273910999298 | loss/mean: 0.8271273910999298 | loss/std: 0.04737580461207241 | lr: 0.02 | momentum: 0.9


2/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (2/10) loss: 0.7561420202255249 | loss/mean: 0.7561420202255249 | loss/std: 0.0702249409643871 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 


3/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (3/10) loss: 0.7518886744976043 | loss/mean: 0.7518886744976043 | loss/std: 0.04357934056412462 | lr: 0.02 | momentum: 0.9


3/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (3/10) loss: 0.7051062941551208 | loss/mean: 0.7051062941551208 | loss/std: 0.06064434342138397 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 


4/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (4/10) loss: 0.694012176990509 | loss/mean: 0.694012176990509 | loss/std: 0.04031449732906899 | lr: 0.02 | momentum: 0.9


4/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (4/10) loss: 0.6565022230148315 | loss/mean: 0.6565022230148315 | loss/std: 0.04554139553968903 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 


5/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (5/10) loss: 0.6599254786968232 | loss/mean: 0.6599254786968232 | loss/std: 0.046044579960459305 | lr: 0.02 | momentum: 0.9


5/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (5/10) loss: 0.6289795637130737 | loss/mean: 0.6289795637130737 | loss/std: 0.03831677143809258 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 


6/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (6/10) loss: 0.6453774809837342 | loss/mean: 0.6453774809837342 | loss/std: 0.05029413289989844 | lr: 0.02 | momentum: 0.9


6/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (6/10) loss: 0.6206818342208862 | loss/mean: 0.6206818342208862 | loss/std: 0.04108651105066992 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 


7/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (7/10) loss: 0.6292107760906219 | loss/mean: 0.6292107760906219 | loss/std: 0.04111408086737434 | lr: 0.02 | momentum: 0.9


7/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (7/10) loss: 0.6074053168296814 | loss/mean: 0.6074053168296814 | loss/std: 0.03379480886238085 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 


8/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (8/10) loss: 0.6185929417610168 | loss/mean: 0.6185929417610168 | loss/std: 0.03944040933507653 | lr: 0.02 | momentum: 0.9


8/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (8/10) loss: 0.6007710099220276 | loss/mean: 0.6007710099220276 | loss/std: 0.03189200853089671 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 


9/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (9/10) loss: 0.6171003401279449 | loss/mean: 0.6171003401279449 | loss/std: 0.04402393277607579 | lr: 0.02 | momentum: 0.9


9/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (9/10) loss: 0.5983243465423584 | loss/mean: 0.5983243465423584 | loss/std: 0.03124573356882136 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 


10/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (10/10) loss: 0.6102037250995636 | loss/mean: 0.6102037250995636 | loss/std: 0.039771009205711104 | lr: 0.02 | momentum: 0.9


10/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (10/10) loss: 0.5942969441413879 | loss/mean: 0.5942969441413879 | loss/std: 0.030156263301039248 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top models:
experiments/model.0010.pth	10.0000


1/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (1/10) loss: 1.042102187871933 | loss/mean: 1.042102187871933 | loss/std: 0.07451054966978699 | lr: 0.02 | momentum: 0.9


1/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (1/10) loss: 0.7864793300628662 | loss/mean: 0.7864793300628662 | loss/std: 0.101153373968702 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 


2/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (2/10) loss: 0.8864562392234802 | loss/mean: 0.8864562392234802 | loss/std: 0.12172276097716445 | lr: 0.02 | momentum: 0.9


2/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (2/10) loss: 0.8513902306556702 | loss/mean: 0.8513902306556702 | loss/std: 0.20191115039505395 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 


3/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (3/10) loss: 0.8854352414608002 | loss/mean: 0.8854352414608002 | loss/std: 0.0981404531890589 | lr: 0.02 | momentum: 0.9


3/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (3/10) loss: 0.7628847479820251 | loss/mean: 0.7628847479820251 | loss/std: 0.10013412162241833 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 


4/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (4/10) loss: 0.769686883687973 | loss/mean: 0.769686883687973 | loss/std: 0.05839118213949724 | lr: 0.02 | momentum: 0.9


4/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (4/10) loss: 0.6547456979751587 | loss/mean: 0.6547456979751587 | loss/std: 0.05476750702766922 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 


5/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (5/10) loss: 0.719588679075241 | loss/mean: 0.719588679075241 | loss/std: 0.11699548793232997 | lr: 0.02 | momentum: 0.9


5/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (5/10) loss: 0.7627901434898376 | loss/mean: 0.7627901434898376 | loss/std: 0.1321442075439306 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 


6/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (6/10) loss: 0.6976754486560821 | loss/mean: 0.6976754486560821 | loss/std: 0.0934340152461809 | lr: 0.02 | momentum: 0.9


6/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (6/10) loss: 0.6239506721496582 | loss/mean: 0.6239506721496582 | loss/std: 0.08782115840860207 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 


7/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (7/10) loss: 0.7952917218208313 | loss/mean: 0.7952917218208313 | loss/std: 0.1809985184964377 | lr: 0.02 | momentum: 0.9


7/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (7/10) loss: 0.8714446067810059 | loss/mean: 0.8714446067810059 | loss/std: 0.17379308860509401 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 


8/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (8/10) loss: 1.0861395776271818 | loss/mean: 1.0861395776271818 | loss/std: 0.23493075856520065 | lr: 0.02 | momentum: 0.9


8/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (8/10) loss: 1.1714335203170776 | loss/mean: 1.1714335203170776 | loss/std: 0.18515588705445515 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 


9/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (9/10) loss: 1.241444742679596 | loss/mean: 1.241444742679596 | loss/std: 0.14529670255743823 | lr: 0.02 | momentum: 0.9


9/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (9/10) loss: 1.1714447021484375 | loss/mean: 1.1714447021484375 | loss/std: 0.1851640614732887 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 


10/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (10/10) loss: 1.2414447546005247 | loss/mean: 1.2414447546005247 | loss/std: 0.14529669509875237 | lr: 0.02 | momentum: 0.9


10/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (10/10) loss: 1.1714447021484375 | loss/mean: 1.1714447021484375 | loss/std: 0.1851640614732887 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top models:
experiments/model.0010.pth	10.0000


1/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (1/10) loss: 1.028551608324051 | loss/mean: 1.028551608324051 | loss/std: 0.07843320353593367 | lr: 0.02 | momentum: 0.9


1/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (1/10) loss: 0.8010077118873596 | loss/mean: 0.8010077118873596 | loss/std: 0.07503513642231836 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 


2/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (2/10) loss: 0.8210979998111725 | loss/mean: 0.8210979998111725 | loss/std: 0.05601847918642516 | lr: 0.02 | momentum: 0.9


2/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (2/10) loss: 0.8905630350112915 | loss/mean: 0.8905630350112915 | loss/std: 0.11013348240287375 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 


3/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (3/10) loss: 0.9058371543884277 | loss/mean: 0.9058371543884277 | loss/std: 0.11157033186833144 | lr: 0.02 | momentum: 0.9


3/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (3/10) loss: 0.8515006899833679 | loss/mean: 0.8515006899833679 | loss/std: 0.202119488004248 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 


4/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (4/10) loss: 0.9089971899986267 | loss/mean: 0.9089971899986267 | loss/std: 0.10178938680479316 | lr: 0.02 | momentum: 0.9


4/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (4/10) loss: 0.8514438152313233 | loss/mean: 0.8514438152313233 | loss/std: 0.2020297442463405 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 


5/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (5/10) loss: 0.9014264345169067 | loss/mean: 0.9014264345169067 | loss/std: 0.1029729200719927 | lr: 0.02 | momentum: 0.9


5/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (5/10) loss: 0.8514398097991943 | loss/mean: 0.8514398097991943 | loss/std: 0.2020253670068299 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 


6/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (6/10) loss: 0.9008661985397339 | loss/mean: 0.9008661985397339 | loss/std: 0.1024013295188078 | lr: 0.02 | momentum: 0.9


6/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (6/10) loss: 0.8487295150756836 | loss/mean: 0.8487295150756836 | loss/std: 0.1989905462649412 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 


7/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (7/10) loss: 0.8322277128696443 | loss/mean: 0.8322277128696443 | loss/std: 0.08234812095124329 | lr: 0.02 | momentum: 0.9


7/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (7/10) loss: 0.6953249216079712 | loss/mean: 0.6953249216079712 | loss/std: 0.06973355072009223 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 


8/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (8/10) loss: 0.6915811598300934 | loss/mean: 0.6915811598300934 | loss/std: 0.06945356192315759 | lr: 0.02 | momentum: 0.9


8/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (8/10) loss: 0.6044048309326172 | loss/mean: 0.6044048309326172 | loss/std: 0.07595842888584106 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 


9/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (9/10) loss: 0.6582734167575838 | loss/mean: 0.6582734167575838 | loss/std: 0.10594252284976946 | lr: 0.02 | momentum: 0.9


9/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (9/10) loss: 0.6839355945587158 | loss/mean: 0.6839355945587158 | loss/std: 0.1168070292570471 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 


10/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (10/10) loss: 0.6127218782901763 | loss/mean: 0.6127218782901763 | loss/std: 0.06151893773915716 | lr: 0.02 | momentum: 0.9


10/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (10/10) loss: 0.5952696204185486 | loss/mean: 0.5952696204185486 | loss/std: 0.07733629271737244 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top models:
experiments/model.0010.pth	10.0000


1/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (1/10) loss: 1.0652336716651913 | loss/mean: 1.0652336716651913 | loss/std: 0.03392065730787069 | lr: 0.02 | momentum: 0.9


1/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (1/10) loss: 0.9201510667800903 | loss/mean: 0.9201510667800903 | loss/std: 0.062232362982389156 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 


2/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (2/10) loss: 0.8438780903816223 | loss/mean: 0.8438780903816223 | loss/std: 0.055747641298231884 | lr: 0.02 | momentum: 0.9


2/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (2/10) loss: 0.7524623394012451 | loss/mean: 0.7524623394012451 | loss/std: 0.09126937094538058 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 


3/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (3/10) loss: 0.723194706439972 | loss/mean: 0.723194706439972 | loss/std: 0.04485077314436222 | lr: 0.02 | momentum: 0.9


3/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (3/10) loss: 0.6303250670433045 | loss/mean: 0.6303250670433045 | loss/std: 0.0540282371006904 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 


4/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (4/10) loss: 0.6246481001377105 | loss/mean: 0.6246481001377105 | loss/std: 0.04726309721077959 | lr: 0.02 | momentum: 0.9


4/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (4/10) loss: 0.6198530435562134 | loss/mean: 0.6198530435562134 | loss/std: 0.08066318731047406 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 


5/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (5/10) loss: 0.6117484986782074 | loss/mean: 0.6117484986782074 | loss/std: 0.06741337858423746 | lr: 0.02 | momentum: 0.9


5/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (5/10) loss: 0.6184580326080322 | loss/mean: 0.6184580326080322 | loss/std: 0.08377852782798285 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 


6/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (6/10) loss: 0.6201589107513428 | loss/mean: 0.6201589107513428 | loss/std: 0.05484140608025079 | lr: 0.02 | momentum: 0.9


6/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (6/10) loss: 0.5946909546852112 | loss/mean: 0.5946909546852112 | loss/std: 0.068792970105379 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 


7/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (7/10) loss: 0.6655430555343628 | loss/mean: 0.6655430555343628 | loss/std: 0.0843529238355563 | lr: 0.02 | momentum: 0.9


7/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (7/10) loss: 0.6155053973197937 | loss/mean: 0.6155053973197937 | loss/std: 0.0646987594436279 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 


8/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (8/10) loss: 0.5944459557533265 | loss/mean: 0.5944459557533265 | loss/std: 0.04532737899674422 | lr: 0.02 | momentum: 0.9


8/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (8/10) loss: 0.5791426301002502 | loss/mean: 0.5791426301002502 | loss/std: 0.04500185247262213 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 


9/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (9/10) loss: 0.6013752579689027 | loss/mean: 0.6013752579689027 | loss/std: 0.061679789716029644 | lr: 0.02 | momentum: 0.9


9/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (9/10) loss: 0.5849131226539612 | loss/mean: 0.5849131226539612 | loss/std: 0.03732484928772136 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 


10/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (10/10) loss: 0.6033385872840882 | loss/mean: 0.6033385872840882 | loss/std: 0.06560368877598852 | lr: 0.02 | momentum: 0.9


10/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (10/10) loss: 0.5877746701240539 | loss/mean: 0.5877746701240539 | loss/std: 0.03895815017278494 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top models:
experiments/model.0010.pth	10.0000


1/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (1/10) loss: 1.075685405731201 | loss/mean: 1.075685405731201 | loss/std: 0.03178224726401924 | lr: 0.02 | momentum: 0.9


1/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (1/10) loss: 0.895913565158844 | loss/mean: 0.895913565158844 | loss/std: 0.06035832199513633 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 


2/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (2/10) loss: 0.8213372111320496 | loss/mean: 0.8213372111320496 | loss/std: 0.06998392927360224 | lr: 0.02 | momentum: 0.9


2/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (2/10) loss: 0.6649673342704773 | loss/mean: 0.6649673342704773 | loss/std: 0.058121312791242954 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 


3/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (3/10) loss: 0.6331037044525146 | loss/mean: 0.6331037044525146 | loss/std: 0.04670980227454376 | lr: 0.02 | momentum: 0.9


3/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (3/10) loss: 0.6122995257377625 | loss/mean: 0.6122995257377625 | loss/std: 0.04855859474421712 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 


4/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (4/10) loss: 0.6583375811576843 | loss/mean: 0.6583375811576843 | loss/std: 0.1062352413924385 | lr: 0.02 | momentum: 0.9


4/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (4/10) loss: 0.6220185875892639 | loss/mean: 0.6220185875892639 | loss/std: 0.10374422445263246 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 


5/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (5/10) loss: 0.6146392464637757 | loss/mean: 0.6146392464637757 | loss/std: 0.05761296953331951 | lr: 0.02 | momentum: 0.9


5/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (5/10) loss: 0.6057592868804932 | loss/mean: 0.6057592868804932 | loss/std: 0.09983176585190258 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 


6/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (6/10) loss: 0.6763321042060852 | loss/mean: 0.6763321042060852 | loss/std: 0.12083293098204732 | lr: 0.02 | momentum: 0.9


6/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (6/10) loss: 0.8512117505073548 | loss/mean: 0.8512117505073548 | loss/std: 0.20175280548470007 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 


7/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (7/10) loss: 0.9012219786643981 | loss/mean: 0.9012219786643981 | loss/std: 0.10288263399091986 | lr: 0.02 | momentum: 0.9


7/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (7/10) loss: 0.851444685459137 | loss/mean: 0.851444685459137 | loss/std: 0.20203052697338075 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 


8/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (8/10) loss: 0.9014447152614593 | loss/mean: 0.9014447152614593 | loss/std: 0.10298576927790941 | lr: 0.02 | momentum: 0.9


8/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (8/10) loss: 0.851444685459137 | loss/mean: 0.851444685459137 | loss/std: 0.20203052697338075 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 


9/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (9/10) loss: 0.9014447271823883 | loss/mean: 0.9014447271823883 | loss/std: 0.10298576343178274 | lr: 0.02 | momentum: 0.9


9/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (9/10) loss: 0.851444685459137 | loss/mean: 0.851444685459137 | loss/std: 0.20203052697338075 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 


10/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (10/10) loss: 0.9014447271823883 | loss/mean: 0.9014447271823883 | loss/std: 0.10298576343178274 | lr: 0.02 | momentum: 0.9


10/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (10/10) loss: 0.851444685459137 | loss/mean: 0.851444685459137 | loss/std: 0.20203052697338075 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top models:
experiments/model.0010.pth	10.0000


1/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (1/10) loss: 1.0962778449058532 | loss/mean: 1.0962778449058532 | loss/std: 0.027680003802684178 | lr: 0.02 | momentum: 0.9


1/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (1/10) loss: 1.0453269958496094 | loss/mean: 1.0453269958496094 | loss/std: 0.035175976124892004 | lr: 0.02 | momentum: 0.9
* Epoch (1/10) 


2/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (2/10) loss: 0.9754522919654844 | loss/mean: 0.9754522919654844 | loss/std: 0.05695535879922668 | lr: 0.02 | momentum: 0.9


2/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (2/10) loss: 0.8232440948486328 | loss/mean: 0.8232440948486328 | loss/std: 0.07057036592329 | lr: 0.02 | momentum: 0.9
* Epoch (2/10) 


3/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (3/10) loss: 0.7767148554325104 | loss/mean: 0.7767148554325104 | loss/std: 0.04823939908546852 | lr: 0.02 | momentum: 0.9


3/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (3/10) loss: 0.6781964421272277 | loss/mean: 0.6781964421272277 | loss/std: 0.060763248053669956 | lr: 0.02 | momentum: 0.9
* Epoch (3/10) 


4/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (4/10) loss: 0.6582090258598328 | loss/mean: 0.6582090258598328 | loss/std: 0.03622252899968127 | lr: 0.02 | momentum: 0.9


4/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (4/10) loss: 0.6289604783058167 | loss/mean: 0.6289604783058167 | loss/std: 0.07010127250751018 | lr: 0.02 | momentum: 0.9
* Epoch (4/10) 


5/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (5/10) loss: 0.6174744248390197 | loss/mean: 0.6174744248390197 | loss/std: 0.03853228873084861 | lr: 0.02 | momentum: 0.9


5/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (5/10) loss: 0.6096050858497619 | loss/mean: 0.6096050858497619 | loss/std: 0.07129125460186055 | lr: 0.02 | momentum: 0.9
* Epoch (5/10) 


6/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (6/10) loss: 0.6001222729682922 | loss/mean: 0.6001222729682922 | loss/std: 0.048040824024589326 | lr: 0.02 | momentum: 0.9


6/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (6/10) loss: 0.6058455824851989 | loss/mean: 0.6058455824851989 | loss/std: 0.07370464744591872 | lr: 0.02 | momentum: 0.9
* Epoch (6/10) 


7/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (7/10) loss: 0.6184334397315979 | loss/mean: 0.6184334397315979 | loss/std: 0.06254564528301507 | lr: 0.02 | momentum: 0.9


7/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (7/10) loss: 0.6231663346290588 | loss/mean: 0.6231663346290588 | loss/std: 0.08440586053255185 | lr: 0.02 | momentum: 0.9
* Epoch (7/10) 


8/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (8/10) loss: 0.6528749883174896 | loss/mean: 0.6528749883174896 | loss/std: 0.07413754899420924 | lr: 0.02 | momentum: 0.9


8/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (8/10) loss: 0.6139261841773986 | loss/mean: 0.6139261841773986 | loss/std: 0.05789440845061788 | lr: 0.02 | momentum: 0.9
* Epoch (8/10) 


9/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (9/10) loss: 0.6003472089767456 | loss/mean: 0.6003472089767456 | loss/std: 0.03392144681106708 | lr: 0.02 | momentum: 0.9


9/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (9/10) loss: 0.578070855140686 | loss/mean: 0.578070855140686 | loss/std: 0.032555432216101125 | lr: 0.02 | momentum: 0.9
* Epoch (9/10) 


10/10 * Epoch (train):   0%|          | 0/10 [00:00<?, ?it/s]

train (10/10) loss: 0.6397575318813323 | loss/mean: 0.6397575318813323 | loss/std: 0.08284234013907164 | lr: 0.02 | momentum: 0.9


10/10 * Epoch (valid):   0%|          | 0/5 [00:00<?, ?it/s]

valid (10/10) loss: 0.5987631440162658 | loss/mean: 0.5987631440162658 | loss/std: 0.047015668781552104 | lr: 0.02 | momentum: 0.9
* Epoch (10/10) 
Top models:
experiments/model.0010.pth	10.0000
