In [1]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import argparse

In [2]:
def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--conv-in", type=int, default=4, help="Input sequence features"
    )
    parser.add_argument("--batch_size", type=int, default=512, required=False)
    parser.add_argument("--step_interval", type=int, default=100, required=False)
    parser.add_argument("--lr", type=float, default=0.001, required=False)
    parser.add_argument(
        "--train-file",
        type=str,
        help="feature file used in trainning",
        default="/home/xiaoyf/methylation/deepsignal/log/feature.txt",
    )
    parser.add_argument(
        "--model_dir", type=str, default="/home/xiaoyf/methylation/deepsignal/log/"
    )
    parser.add_argument(
        "--max_epoch_num",
        action="store",
        default=10,
        type=int,
        required=False,
        help="max epoch num, default 10",
    )
    parser.add_argument(
        "--min_epoch_num",
        action="store",
        default=5,
        type=int,
        required=False,
        help="min epoch num, default 5",
    )
    return parser.parse_args([])

In [3]:
def squash(tensor, dim=-1):
    squared_norm = (tensor**2).sum(dim=dim, keepdim=True)
    scale = squared_norm / (1 + squared_norm)
    return scale * tensor / torch.sqrt(squared_norm)

In [4]:
class Squash(nn.Module):
    def __init__(self, eps=10e-21, **kwargs):
        super(Squash, self).__init__(**kwargs)
        self.eps = eps

    def forward(self, s):
        n = torch.norm(s, dim=-1, keepdim=True)
        return (1 - 1 / (torch.exp(n) + self.eps)) * (s / (n + self.eps))

In [112]:
def dynamic_routing(x, iterations=3):
    #x = x.unsqueeze(-1)
    N = x.shape[1]  # num_caps
    N1 = x.shape[2]  # in_caps
    B = x.shape[0]
    # feature_dim = x.shape[2]
    #x:batch_size, num_caps, in_caps, out_channels
    b = torch.zeros(B, N, N1,1).to(x.device)#batch_size, num_caps, in_caps
    for _ in range(iterations):
        #print('input x\'s batch_size: {}, num_caps: {}, in_caps: {}, out_channels: {}'.format(x.shape[0], x.shape[1], x.shape[2], x.shape[3]))
        c = F.softmax(b, dim=1)#Softmax along num_caps
        #batch_size, num_caps,caps_dim
        #print('softmax result\'s batch_size: {}, num_caps: {}, in_caps: {}, softmax_result: {}'.format(c.shape[0], c.shape[1], c.shape[2], c.shape[3]))
        a = c*x
        #print('a\'s batch_size: {}, num_caps: {}, in_caps: {}, out_channels: {}'.format(a.shape[0], a.shape[1], a.shape[2], a.shape[3]))
        s = torch.sum(a, dim=2).squeeze(-1)#sum across in_caps
        #print('s\'s batch_size: {}, num_caps: {}, out_channels: {}'.format(s.shape[0], s.shape[1], s.shape[2]))
        v = squash(s)# apply "squashing" non-linearity along out_channels
        #print('v\'s batch_size: {}, num_caps: {}, out_channels: {}'.format(v.shape[0], v.shape[1], v.shape[2]))
        #print('x shape: {}'.format(x.shape))
        y = torch.matmul(x,v.unsqueeze(-1))
        #print('y shape: {}'.format(y.shape))
        #print('b shape: {}'.format(b.shape))
        b = b + y

    return v

In [110]:
#x:batch_size, num_caps, in_caps, out_channels
input_tensor=torch.randn(2, 2, 5,205)
print(dynamic_routing(input_tensor).shape)

input x's batch_size: 2, num_caps: 2, in_caps: 5, out_channels: 205
softmax result's batch_size: 2, num_caps: 2, in_caps: 5, softmax_result: 1
a's batch_size: 2, num_caps: 2, in_caps: 5, out_channels: 205
s's batch_size: 2, num_caps: 2, out_channels: 205
v's batch_size: 2, num_caps: 2, out_channels: 205
x shape: torch.Size([2, 2, 5, 205])
y shape: torch.Size([2, 2, 5, 1])
b shape: torch.Size([2, 2, 5, 1])
input x's batch_size: 2, num_caps: 2, in_caps: 5, out_channels: 205
softmax result's batch_size: 2, num_caps: 2, in_caps: 5, softmax_result: 1
a's batch_size: 2, num_caps: 2, in_caps: 5, out_channels: 205
s's batch_size: 2, num_caps: 2, out_channels: 205
v's batch_size: 2, num_caps: 2, out_channels: 205
x shape: torch.Size([2, 2, 5, 205])
y shape: torch.Size([2, 2, 5, 1])
b shape: torch.Size([2, 2, 5, 1])
input x's batch_size: 2, num_caps: 2, in_caps: 5, out_channels: 205
softmax result's batch_size: 2, num_caps: 2, in_caps: 5, softmax_result: 1
a's batch_size: 2, num_caps: 2, in_caps

In [None]:
import torch.nn as nn

# Define the input and output channels
in_channels = 2
out_channels = 1

# Define the kernel size and dilation
kernel_size = 2

# Define the 1D dilated convolution layers
conv1d_list = nn.ModuleList()
for dilation in range(1, 6):
    padding = "same"
    conv1d_list.append(
        nn.Conv1d(
            in_channels, out_channels, kernel_size, dilation=dilation, padding=padding
        )
    )

# Define the input tensor
input_tensor = torch.randn(1, in_channels, 21 * 5)

# Apply the 1D dilated convolutions to the input tensor
output_tensor_list = []
for conv1d in conv1d_list:
    print(conv1d(input_tensor).shape)
    output_tensor_list.append(conv1d(input_tensor))

# Concatenate the output tensors along the channel dimension
output_tensor = torch.cat(output_tensor_list, dim=1)

print(output_tensor.shape)
print(dynamic_routing(output_tensor).shape)

In [16]:
c = F.softmax(torch.zeros(1, 1, 5, 1, 1), dim=1)
# print(c)
print(c.shape)
x = torch.randn(1, 5, 21 * 5, 1)
a = x.matmul(c)
# print(a)
print(a.shape)
s = torch.sum(x.matmul(c), dim=2).squeeze(-1)
# print(s)
print(s.shape)

torch.Size([1, 1, 5, 1, 1])
torch.Size([1, 1, 5, 105, 1])
torch.Size([1, 1, 105])


In [127]:
class PrimaryCapsuleLayer(nn.Module):
    """
    Create a primary capsule layer with the methodology described in 'Efficient-CapsNet: Capsule Network with Self-Attention Routing'.
    Properties of each capsule s_n are exatracted using a 1D depthwise convolution.

    ...

    Attributes
    ----------
    kernel_size[w]: int
        depthwise conv kernel dimension
    conv_num: int
        number of primary capsules
    feature_dimension: int
        primary capsules dimension (number of properties)
    conv_stride: int
        depthwise conv strides
    Methods
    -------
    forward(inputs)
        compute the primary capsule layer
    """

    def __init__(
        self,
        conv_in=2,
        feature_dimension=21 * 5,
        kernel_size=2,
        conv_num=5,
        base_num=21,
    ):
        super().__init__()
        
        self.conv_out = feature_dimension // (conv_num * base_num)
        self.conv_num = conv_num
        self.primary_capsule_layer = nn.ModuleList(
            [
                nn.Conv1d(
                    conv_in,
                    self.conv_out,
                    kernel_size,
                    dilation=conv_stride,
                    padding="same",
                )
                for conv_stride in range(1, conv_num + 1)
            ]
        )

    def forward(self, x):
        
        #print('input feature shape: {}'.format(x.shape))
        capsules = [conv(x) for conv in self.primary_capsule_layer]
        # capsules_reshaped = [
        #    c.reshape(self.conv_num, self.feature_dimension) for c in capsules
        # ]
        output_tensor = torch.cat(capsules, dim=1)
        return Squash()(output_tensor)


def test_for_primary_capsule_layer():
    input = torch.rand(1, 2, 105)
    layer = PrimaryCapsuleLayer()
    print(layer(input).shape)


test_for_primary_capsule_layer()

torch.Size([1, 5, 105])


In [113]:
class CapsLayer(nn.Module):
    def __init__(
        self, num_capsules=1, in_caps=5, in_channels=105, out_channels=2
    ):
        super(CapsLayer, self).__init__()
        self.W = nn.Parameter(
            0.01
            * torch.randn(1, num_capsules, in_caps, out_channels, in_channels)
        )
        # print('W shape: {}'.format(self.W.shape))

    def forward(self, x):
        #print('CapsLayer input shape: {}'.format(x.shape))
        x = x[:, None, ..., None]  # x.unsqueeze(1).unsqueeze(4)
        # x = x.unsqueeze(-1)
        #print('W shape: {}'.format(self.W.shape))
        #print('CapsLayer input shape: {}'.format(x.shape))
        # print('CapsLayer input expand shape: {}'.format(x[ :, :, None, :].shape))
        # (batch_size, num_caps, num_route_nodes, out_channels, 1)
        # print('x shape: {}'.format(x.shape))
        u_hat = torch.matmul(self.W, x)  # (x @ self.W).squeeze(2)
        # u=u_hat.squeeze(-1)
        u_hat = u_hat.squeeze(-1)
        #batch_size, num_caps, in_caps, out_channels
        #print('u_hat\'s batch_size: {}, num_caps: {}, in_caps: {}, out_channels: {}'.format(u_hat.shape[0], u_hat.shape[1], u_hat.shape[2], u_hat.shape[3]))
        class_capsules = dynamic_routing(u_hat)
        return class_capsules

In [54]:
a = torch.rand(1, 10, 10, 20, 1)
b = torch.rand(1, 1, 10, 1, 1)
c = a.matmul(b)

In [107]:
input = torch.rand(2, 5, 105)
layer = CapsLayer()
print(layer(input).shape)

CapsLayer input shape: torch.Size([2, 5, 105])
W shape: torch.Size([1, 1, 5, 2, 105])
CapsLayer input shape: torch.Size([2, 1, 5, 105, 1])
u_hat's batch_size: 2, num_caps: 1, in_caps: 5, out_channels: 2
input x's batch_size: 2, num_caps: 1, in_caps: 5, out_channels: 2
softmax result's batch_size: 2, num_caps: 1, in_caps: 5, softmax_result: 1
a's batch_size: 2, num_caps: 1, in_caps: 5, out_channels: 2
s's batch_size: 2, num_caps: 1, out_channels: 2
v's batch_size: 2, num_caps: 1, out_channels: 2
x shape: torch.Size([2, 1, 5, 2])
y shape: torch.Size([2, 1, 5, 1])
b shape: torch.Size([2, 1, 5, 1])
input x's batch_size: 2, num_caps: 1, in_caps: 5, out_channels: 2
softmax result's batch_size: 2, num_caps: 1, in_caps: 5, softmax_result: 1
a's batch_size: 2, num_caps: 1, in_caps: 5, out_channels: 2
s's batch_size: 2, num_caps: 1, out_channels: 2
v's batch_size: 2, num_caps: 1, out_channels: 2
x shape: torch.Size([2, 1, 5, 2])
y shape: torch.Size([2, 1, 5, 1])
b shape: torch.Size([2, 1, 5, 1])

In [139]:
class CapsNet(nn.Module):
    def __init__(self,vocab_size=4,embedding_size=1):
        super(CapsNet, self).__init__()
        self.embed = nn.Embedding(vocab_size, embedding_size)
        self.primary_layer = PrimaryCapsuleLayer()
        self.caps_layer = CapsLayer()
        self.softmax = nn.Softmax(1)

    def forward(self, seq,sig):
        seq_emb = self.embed(seq.long())
        seq_emb = seq_emb.reshape(seq_emb.shape[0], 1, -1)
        sig = sig.reshape(sig.shape[0], 1, -1)
        #print('seq_emb shape: {}'.format(seq_emb.shape))
        #print('sig shape: {}'.format(sig.shape))
        x = torch.cat((seq_emb,sig), dim=1)
        #seq = self.primary_layer(seq)
        #seq = self.caps_layer(seq)
        #sig = self.primary_layer(sig)
        #sig = self.caps_layer(sig)
        x = self.primary_layer(x)
        x = self.caps_layer(x)
        x = torch.norm(x,dim=-1)
        return x


def test_for_caps_net():
    input1 = torch.rand(1, 1, 105)
    input2 = torch.rand(1, 1, 105)
    
    model = CapsNet()
    #print(model(input1,input2).shape)


test_for_caps_net()

In [141]:
class CapsuleLoss(nn.Module):
    def __init__(self):
        super(CapsuleLoss, self).__init__()

    def forward(self, classes, labels):
        #classes = classes.reshape(classes.shape[0], 2)
        labels = labels.reshape(labels.shape[0], 1)
        #print('classes shape: {}'.format(classes.shape))
        #print('labels shape: {}'.format(labels.shape))
        left = F.relu(0.9 - classes, inplace=True) ** 2
        #print('left shape: {}'.format(left.shape))
        right = F.relu(classes - 0.1, inplace=True) ** 2
        #print('right shape: {}'.format(right.shape))

        margin_loss = labels * left + 0.5 * (1.0 - labels) * right
        margin_loss = margin_loss.sum()
        return margin_loss

In [17]:
np.random.randint(0, 2)

1

In [49]:
from torch.utils.data import Dataset
import linecache

base2code_dna = {"A": 0, "C": 1, "G": 2, "T": 3, "N": 4}
code2base_dna = {0: "A", 1: "C", 2: "G", 3: "T", 4: "N"}


def clear_linecache():
    # linecache should be treated carefully
    linecache.clearcache()


def parse_a_line2(line):
    words = line.strip().split("\t")

    seq = np.array(
        [[base2code_dna[y] for y in x.split(",")] for x in words[1].split(";")]
    )
    signal = np.array(
        [[np.float16(y) for y in x.split(",")] for x in words[2].split(";")]
    )
    label = np.random.randint(0, 2)

    return seq, signal, label


class SignalDataset(Dataset):
    def __init__(self, filename, transform=None):
        # print(">>>using linecache to access '{}'<<<\n"
        #       ">>>after done using the file, "
        #       "remember to use linecache.clearcache() to clear cache for safety<<<".format(filename))
        self._filename = os.path.abspath(filename)
        self._total_data = 0
        self._transform = transform
        with open(filename, "r") as f:
            self._total_data = len(f.readlines())

    def __getitem__(self, idx):
        line = linecache.getline(self._filename, idx + 1)
        if line == "":
            return None
        else:
            output = parse_a_line2(line)
            if self._transform is not None:
                output = self._transform(output)
            return output

    def __len__(self):
        return self._total_data

In [144]:
import sys
from torch.optim.lr_scheduler import StepLR
import time
from sklearn import metrics
import re

if __name__ == "__main__":
    total_start = time.time()
    args = parse_args()
    train_dataset = SignalDataset(args.train_file)
    train_loader = torch.utils.data.DataLoader(
        dataset=train_dataset, batch_size=args.batch_size, shuffle=False
    )
    total_step = len(train_loader)
    valid_loader = torch.utils.data.DataLoader(
        dataset=train_dataset, batch_size=args.batch_size, shuffle=True
    )
    model = CapsNet()
    criterion = CapsuleLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    scheduler = StepLR(optimizer, step_size=2, gamma=0.1)
    curr_best_accuracy = 0
    model_dir = args.model_dir
    if model_dir != "/":
        model_dir = os.path.abspath(model_dir).rstrip("/")
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)
        else:
            model_regex = re.compile(
                r"" + "\.b\d+_s\d+_epoch\d+\.ckpt*"
            )
            for mfile in os.listdir(model_dir):
                if model_regex.match(mfile):
                    os.remove(model_dir + "/" + mfile)
        model_dir += "/"
    model.train()
    for epoch in range(args.max_epoch_num):
        curr_best_accuracy_epoch = 0
        no_best_model = True
        tlosses = []
        start = time.time()
        for i, sfeatures in enumerate(train_loader):
            (seq, signal, labels) = sfeatures
            outputs = model(seq, signal)
            loss = criterion(outputs, labels)
            tlosses.append(loss.detach().item())

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
            optimizer.step()
            if (i + 1) % args.step_interval == 0 or i == total_step - 1:
                model.eval()
                with torch.no_grad():
                    vlosses, vlabels_total, vpredicted_total = [], [], []
                    for vi, vsfeatures in enumerate(valid_loader):
                        (
                            vseq,
                            vsignal,
                            vlabels,
                        ) = vsfeatures
                        voutputs = model(vseq, vsignal)
                        vloss = criterion(voutputs, vlabels)

                        _, vpredicted = torch.max(voutputs.data, 1)
                        #print(vpredicted)
                        vlosses.append(vloss.item())
                        vlabels_total += vlabels.tolist()
                        vpredicted_total += vpredicted.tolist()
                        v_accuracy = metrics.accuracy_score(
                            vlabels_total, vpredicted_total
                        )
                        v_precision = metrics.precision_score(
                            vlabels_total, vpredicted_total
                        )
                        v_recall = metrics.recall_score(vlabels_total, vpredicted_total)
                        if v_accuracy > curr_best_accuracy_epoch:
                            curr_best_accuracy_epoch = v_accuracy
                            if curr_best_accuracy_epoch > curr_best_accuracy - 0.0002:
                                torch.save(
                                    model.state_dict(),
                                    model_dir
                                    + ".epoch{}.ckpt".format(
                                        epoch
                                    ),
                                )
                                if curr_best_accuracy_epoch > curr_best_accuracy:
                                    curr_best_accuracy = curr_best_accuracy_epoch
                                    no_best_model = False
                        time_cost = time.time() - start
                        print(
                            "Epoch [{}/{}], Step [{}/{}], TrainLoss: {:.4f}; "
                            "ValidLoss: {:.4f}, "
                            "Accuracy: {:.4f}, Precision: {:.4f}, Recall: {:.4f}, "
                            "curr_epoch_best_accuracy: {:.4f}; Time: {:.2f}s".format(
                                epoch + 1,
                                args.max_epoch_num,
                                i + 1,
                                total_step,
                                np.mean(tlosses),
                                np.mean(vlosses),
                                v_accuracy,
                                v_precision,
                                v_recall,
                                curr_best_accuracy_epoch,
                                time_cost,
                            )
                        )
                        tlosses = []
                        start = time.time()
                        sys.stdout.flush()
                    model.train()
            scheduler.step()
            if no_best_model and epoch >= args.min_epoch_num - 1:
                print("early stop!")
                break
        endtime = time.time()
        clear_linecache()
        print(
            "[main] train costs {} seconds, "
            "best accuracy: {}".format(endtime - total_start, curr_best_accuracy)
        )

Epoch [1/10], Step [100/1421], TrainLoss: 196.5193; ValidLoss: 206.4992, Accuracy: 0.4727, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.4727; Time: 14.27s


  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 200.0250, Accuracy: 0.4893, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.4893; Time: 0.28s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 199.1698, Accuracy: 0.4915, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.4915; Time: 0.09s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 197.8267, Accuracy: 0.4951, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.4951; Time: 0.08s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 198.7946, Accuracy: 0.4926, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.4951; Time: 0.11s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 197.4223, Accuracy: 0.4961, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.4961; Time: 0.22s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.7640, Accuracy: 0.4978, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.4978; Time: 0.11s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9871, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.4998; Time: 0.17s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0645, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.4998; Time: 0.25s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0504, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.4998; Time: 0.09s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.1408, Accuracy: 0.5020, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5020; Time: 0.10s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.7633, Accuracy: 0.5003, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5020; Time: 0.09s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.9381, Accuracy: 0.5024, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5024; Time: 0.15s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.5843, Accuracy: 0.5033, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5033; Time: 0.24s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.7501, Accuracy: 0.5004, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5033; Time: 0.10s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.3760, Accuracy: 0.5013, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5033; Time: 0.11s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2557, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5033; Time: 0.12s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9057, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5033; Time: 0.21s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.6993, Accuracy: 0.5005, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5033; Time: 0.14s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.5938, Accuracy: 0.5008, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5033; Time: 0.13s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.4243, Accuracy: 0.5012, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5033; Time: 0.12s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.3753, Accuracy: 0.5013, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5033; Time: 0.14s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.0289, Accuracy: 0.5022, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5033; Time: 0.12s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.9405, Accuracy: 0.5024, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5033; Time: 0.13s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.3321, Accuracy: 0.5040, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5040; Time: 0.15s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.1548, Accuracy: 0.5044, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5044; Time: 0.18s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.0550, Accuracy: 0.5047, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5047; Time: 0.18s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.0122, Accuracy: 0.5048, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5048; Time: 0.15s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.3956, Accuracy: 0.5038, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5048; Time: 0.13s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.6495, Accuracy: 0.5032, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5048; Time: 0.14s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7645, Accuracy: 0.5029, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5048; Time: 0.16s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.5385, Accuracy: 0.5035, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5048; Time: 0.14s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.6446, Accuracy: 0.5032, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5048; Time: 0.14s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7186, Accuracy: 0.5030, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5048; Time: 0.15s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.4267, Accuracy: 0.5037, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5048; Time: 0.15s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.4877, Accuracy: 0.5036, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5048; Time: 0.16s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.8559, Accuracy: 0.5026, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5048; Time: 0.15s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.8647, Accuracy: 0.5026, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5048; Time: 0.16s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.9512, Accuracy: 0.5024, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5048; Time: 0.15s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7070, Accuracy: 0.5030, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5048; Time: 0.17s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.5486, Accuracy: 0.5034, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5048; Time: 0.16s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.6742, Accuracy: 0.5031, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5048; Time: 0.21s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.2003, Accuracy: 0.5043, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5048; Time: 0.25s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.2052, Accuracy: 0.5043, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5048; Time: 0.27s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.0905, Accuracy: 0.5046, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5048; Time: 0.31s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.0307, Accuracy: 0.5048, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5048; Time: 0.16s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 193.8930, Accuracy: 0.5051, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5051; Time: 0.17s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 193.8219, Accuracy: 0.5053, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.26s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 193.9598, Accuracy: 0.5049, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.26s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.0142, Accuracy: 0.5048, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.33s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 193.9586, Accuracy: 0.5049, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.15s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.0520, Accuracy: 0.5047, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.25s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.4185, Accuracy: 0.5038, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.16s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.3319, Accuracy: 0.5040, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.19s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.1815, Accuracy: 0.5044, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.23s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.3109, Accuracy: 0.5040, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.30s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.6480, Accuracy: 0.5032, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.34s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.6994, Accuracy: 0.5031, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.17s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.8879, Accuracy: 0.5026, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.34s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.0569, Accuracy: 0.5021, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.17s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.0953, Accuracy: 0.5020, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.20s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.1795, Accuracy: 0.5018, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.21s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.9707, Accuracy: 0.5024, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.20s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.8415, Accuracy: 0.5027, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.32s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.9881, Accuracy: 0.5023, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.17s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.0263, Accuracy: 0.5022, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.18s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.9582, Accuracy: 0.5024, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.19s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.8135, Accuracy: 0.5028, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.20s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7983, Accuracy: 0.5028, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.22s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.9032, Accuracy: 0.5025, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.22s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.9593, Accuracy: 0.5024, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.20s
Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.0072, Accuracy: 0.5023, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.19s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.0162, Accuracy: 0.5022, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.39s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.9358, Accuracy: 0.5025, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.30s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.8163, Accuracy: 0.5028, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.31s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.9210, Accuracy: 0.5025, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.32s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.9555, Accuracy: 0.5024, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.28s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.8499, Accuracy: 0.5027, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.23s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.8526, Accuracy: 0.5027, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.29s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7022, Accuracy: 0.5031, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.24s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7159, Accuracy: 0.5030, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.22s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.8978, Accuracy: 0.5025, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.27s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.9373, Accuracy: 0.5024, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.28s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.8308, Accuracy: 0.5027, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.31s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7974, Accuracy: 0.5028, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.33s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.8647, Accuracy: 0.5026, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.39s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7697, Accuracy: 0.5029, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.36s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7570, Accuracy: 0.5029, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.39s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7198, Accuracy: 0.5030, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.27s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.6483, Accuracy: 0.5032, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.28s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7467, Accuracy: 0.5029, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.25s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.9162, Accuracy: 0.5025, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.27s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.9014, Accuracy: 0.5025, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.24s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7738, Accuracy: 0.5029, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.27s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7466, Accuracy: 0.5029, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.35s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.6155, Accuracy: 0.5033, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.31s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.5969, Accuracy: 0.5033, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.28s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.6411, Accuracy: 0.5032, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.37s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.6541, Accuracy: 0.5032, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.40s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7357, Accuracy: 0.5030, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.39s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.6702, Accuracy: 0.5031, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.29s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.6373, Accuracy: 0.5032, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.38s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.5084, Accuracy: 0.5035, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.28s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.5288, Accuracy: 0.5035, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.24s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7316, Accuracy: 0.5030, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.41s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7786, Accuracy: 0.5029, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.29s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7319, Accuracy: 0.5030, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.32s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.8151, Accuracy: 0.5028, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.26s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7050, Accuracy: 0.5030, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.25s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7022, Accuracy: 0.5031, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.29s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.9048, Accuracy: 0.5025, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.32s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.9476, Accuracy: 0.5024, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.30s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.0378, Accuracy: 0.5022, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.28s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.0658, Accuracy: 0.5021, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.28s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.0466, Accuracy: 0.5022, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.30s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.9284, Accuracy: 0.5025, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.33s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.8188, Accuracy: 0.5028, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.28s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.6912, Accuracy: 0.5031, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.28s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.6685, Accuracy: 0.5031, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.29s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.6919, Accuracy: 0.5031, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.40s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7337, Accuracy: 0.5030, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.29s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7486, Accuracy: 0.5029, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.33s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7401, Accuracy: 0.5030, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.28s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.8473, Accuracy: 0.5027, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.29s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7987, Accuracy: 0.5028, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.34s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7583, Accuracy: 0.5029, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.28s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7788, Accuracy: 0.5028, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.29s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7155, Accuracy: 0.5030, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.35s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.6527, Accuracy: 0.5032, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.30s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.6852, Accuracy: 0.5031, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.30s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.6475, Accuracy: 0.5032, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.45s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.5700, Accuracy: 0.5034, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.31s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7926, Accuracy: 0.5028, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.32s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.7777, Accuracy: 0.5028, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.46s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.8761, Accuracy: 0.5026, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.38s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.8955, Accuracy: 0.5025, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.38s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.9247, Accuracy: 0.5025, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.47s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.0042, Accuracy: 0.5023, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.36s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 194.9500, Accuracy: 0.5024, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.42s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.0069, Accuracy: 0.5023, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.36s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.0511, Accuracy: 0.5021, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.55s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.0401, Accuracy: 0.5022, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.50s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.0357, Accuracy: 0.5022, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.38s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.0631, Accuracy: 0.5021, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.38s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.0579, Accuracy: 0.5021, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.39s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.1171, Accuracy: 0.5020, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.30s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.2059, Accuracy: 0.5018, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.31s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.2769, Accuracy: 0.5016, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.37s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.2241, Accuracy: 0.5017, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.48s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.3101, Accuracy: 0.5015, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.38s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.3346, Accuracy: 0.5014, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.43s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.2674, Accuracy: 0.5016, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.39s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.3120, Accuracy: 0.5015, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.46s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.2880, Accuracy: 0.5015, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.38s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.2567, Accuracy: 0.5016, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.43s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.1775, Accuracy: 0.5018, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.55s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.1921, Accuracy: 0.5018, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.59s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.2247, Accuracy: 0.5017, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.48s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.2775, Accuracy: 0.5016, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.41s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.2949, Accuracy: 0.5015, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.42s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.2752, Accuracy: 0.5016, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.41s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.3024, Accuracy: 0.5015, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.39s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.3678, Accuracy: 0.5013, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.38s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.3616, Accuracy: 0.5014, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.42s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.3003, Accuracy: 0.5015, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.46s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.3403, Accuracy: 0.5014, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.42s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.4221, Accuracy: 0.5012, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.36s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.3204, Accuracy: 0.5015, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.39s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.2732, Accuracy: 0.5016, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.40s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.3766, Accuracy: 0.5013, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.40s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.3983, Accuracy: 0.5013, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.47s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.4550, Accuracy: 0.5011, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.55s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.4758, Accuracy: 0.5011, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.41s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.5529, Accuracy: 0.5009, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.42s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.5214, Accuracy: 0.5010, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.38s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.5063, Accuracy: 0.5010, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.34s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.4481, Accuracy: 0.5011, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.42s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.4881, Accuracy: 0.5010, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.39s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.5153, Accuracy: 0.5010, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.46s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.5344, Accuracy: 0.5009, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.47s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.5528, Accuracy: 0.5009, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.50s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.5213, Accuracy: 0.5010, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.41s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.5400, Accuracy: 0.5009, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.50s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.5464, Accuracy: 0.5009, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.43s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.5984, Accuracy: 0.5008, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.39s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.6254, Accuracy: 0.5007, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.45s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.6232, Accuracy: 0.5007, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.41s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.6206, Accuracy: 0.5007, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.55s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.6711, Accuracy: 0.5006, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.63s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.6203, Accuracy: 0.5007, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.55s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.6453, Accuracy: 0.5006, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.43s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.6699, Accuracy: 0.5006, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.58s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.6838, Accuracy: 0.5005, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.48s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.6345, Accuracy: 0.5007, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.56s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.7215, Accuracy: 0.5005, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.59s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.7226, Accuracy: 0.5004, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.67s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.7204, Accuracy: 0.5005, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.57s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.7721, Accuracy: 0.5003, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.63s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.7190, Accuracy: 0.5005, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.48s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.7278, Accuracy: 0.5004, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.55s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.7867, Accuracy: 0.5003, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.42s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8327, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.50s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8130, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.46s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8169, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.60s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8280, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.43s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8938, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.51s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8760, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.49s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8344, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.48s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8311, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.50s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8095, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.58s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.7669, Accuracy: 0.5003, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.61s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.7894, Accuracy: 0.5003, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.46s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8480, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.44s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8479, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.49s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8763, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.44s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8241, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.56s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8389, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.45s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8641, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.63s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8465, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.57s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8432, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.53s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.7854, Accuracy: 0.5003, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.53s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8172, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.49s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8655, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.51s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8386, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.50s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.7982, Accuracy: 0.5003, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.46s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.7815, Accuracy: 0.5003, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.53s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8186, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.69s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8387, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.63s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8854, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.55s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8888, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.53s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8787, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.67s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8729, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.58s


  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8198, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.59s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8267, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.54s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8401, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.53s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8625, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.53s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8851, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.59s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9697, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.66s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9663, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.50s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0457, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.49s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0639, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.54s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0623, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.56s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9864, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.64s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0332, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.53s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0017, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.46s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9578, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.53s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9545, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.49s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9727, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.58s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9590, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.54s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9251, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.51s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8794, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.54s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8697, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.54s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8215, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.53s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8100, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.62s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8491, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.60s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8613, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.57s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8585, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.54s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8316, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.56s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8795, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.57s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8948, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.56s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9247, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.61s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9074, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.99s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8985, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.66s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9210, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.58s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9178, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.58s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8950, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.57s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8493, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.61s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9087, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.65s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8717, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.55s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9090, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.62s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8836, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.57s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8725, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.73s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8792, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.59s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9075, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.67s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9213, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.59s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8765, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.64s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8655, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.63s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8933, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.64s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8771, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.67s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9721, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.59s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9796, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.69s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9363, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.70s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9145, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.63s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9367, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.52s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9442, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.73s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9539, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.77s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9910, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.66s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9354, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.73s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9140, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.59s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9352, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.60s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9565, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.70s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9612, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.72s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0102, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.73s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0091, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.66s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9956, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.79s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9951, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.62s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9584, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.75s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9626, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.70s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9058, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.64s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9361, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.65s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9540, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.60s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9585, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.56s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9556, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.71s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9555, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.66s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9658, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.62s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9878, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.66s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0200, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.67s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0173, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.68s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9871, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.73s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0142, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.69s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0259, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.66s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0253, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.63s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0536, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.91s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0192, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.63s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0116, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.79s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0379, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.69s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0279, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.72s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0229, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.66s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0176, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.68s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9953, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.58s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9875, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.75s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9921, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.66s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9981, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.67s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9603, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.71s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9137, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.65s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9200, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.66s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9222, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.75s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9548, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.89s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9455, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.72s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9687, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.72s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9504, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.70s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9254, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.78s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9574, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.73s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9390, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.77s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9206, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.86s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8977, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.70s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9200, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.82s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9202, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.70s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9201, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.67s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9064, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.68s


  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9405, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.71s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9380, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.70s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9382, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.74s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9491, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.71s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9757, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.79s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9888, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.73s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9708, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.69s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9598, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.76s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9484, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.71s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9464, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.73s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9809, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.71s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9739, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.74s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9629, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.69s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9797, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.74s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9730, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.72s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9196, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.74s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9087, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.79s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8869, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.70s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8719, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.80s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8931, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.76s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9392, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.75s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9431, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.80s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9345, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.71s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9345, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.87s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9384, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.74s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9338, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.79s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9280, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.77s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9344, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.78s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9280, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.70s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9770, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.86s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9851, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.73s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9603, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.71s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9457, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.79s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9701, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.88s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9695, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.85s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9489, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.82s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9508, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.91s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9627, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.87s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0012, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.92s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9986, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.82s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9803, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.83s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9598, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.86s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9777, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.84s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9495, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.76s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9316, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.81s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9117, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.81s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9079, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.84s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8725, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.81s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8668, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.88s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8706, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.82s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8864, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.79s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9061, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.80s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8617, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.82s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8408, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.90s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8274, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.80s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8256, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.80s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8314, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.75s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8372, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.78s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8447, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.79s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8158, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.05s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8139, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.95s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8197, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.88s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8215, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.88s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8611, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.03s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8591, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.91s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8529, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.90s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8191, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.01s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8547, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.92s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8812, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.84s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8401, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.74s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.7844, Accuracy: 0.5003, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.87s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.7993, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.88s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.7640, Accuracy: 0.5003, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.93s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.7685, Accuracy: 0.5003, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.81s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8037, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.89s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8115, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.90s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8078, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.88s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8444, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.00s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8263, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.83s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8316, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.01s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8319, Accuracy: 0.5002, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.83s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8596, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.00s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8563, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.87s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8599, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.87s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8670, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.89s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8599, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.06s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8633, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.07s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8829, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.10s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8780, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.94s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8886, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.90s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9173, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.89s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9081, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.88s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9307, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.90s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9322, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.04s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9445, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.86s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9536, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.96s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9760, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.88s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9720, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.94s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9929, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.05s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0085, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.95s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9758, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.03s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9842, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.01s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0324, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.11s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0184, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.98s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9991, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.95s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0160, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.88s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0331, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.86s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0413, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.00s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0120, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.94s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9792, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.98s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0286, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.89s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0469, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.91s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0308, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.93s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0507, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.22s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0435, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.92s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0431, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.95s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0531, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.89s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0313, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.89s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0196, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.88s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0209, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.09s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0255, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.05s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0099, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.91s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0083, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.00s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0161, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.99s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9992, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.09s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9827, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.05s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9596, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.08s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9723, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.10s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9623, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.95s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9571, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.00s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9671, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.01s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9588, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.02s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9601, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.04s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9633, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.91s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9584, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.06s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9549, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.90s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9627, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.97s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9436, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.10s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9400, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.10s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9448, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.95s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9302, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.01s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9299, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.94s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8902, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.95s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8587, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.99s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8507, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.11s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8379, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.09s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8395, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.99s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8678, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.03s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8862, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.38s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8863, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.26s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8879, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.99s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8475, Accuracy: 0.5001, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.98s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8940, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.98s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9330, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.03s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9360, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.00s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9482, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.03s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9574, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.10s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9741, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.02s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9632, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.06s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9507, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.03s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9340, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.07s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9445, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.02s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9167, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.02s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9136, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.92s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9136, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.04s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9352, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.93s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9410, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.04s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9556, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.98s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9575, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.18s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9710, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.10s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9738, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.05s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9931, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.13s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9796, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.10s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9793, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.03s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9866, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.07s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9969, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.97s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0190, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.01s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0346, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.13s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9956, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.00s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0057, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.13s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9984, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.25s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0230, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.06s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0111, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.25s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0181, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.22s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0262, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.96s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0176, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.06s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0248, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.20s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0305, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.12s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0171, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.23s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9879, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.07s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9835, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.17s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9661, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.13s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9303, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.22s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9286, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.06s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9245, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.02s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9187, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 0.99s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9255, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.14s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9696, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.08s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9707, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.23s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9692, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.04s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9633, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.16s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9588, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.10s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9629, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.08s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9428, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.21s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9414, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.06s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9273, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.10s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9063, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.02s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9273, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.16s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8884, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.41s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.8993, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.21s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9116, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.24s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9045, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.05s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9055, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.14s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9512, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.32s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9564, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.26s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9646, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.08s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9687, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.03s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9577, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.04s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9684, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.10s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9889, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.09s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0050, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.20s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9967, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.23s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9913, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.29s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9858, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.32s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9750, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.25s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9683, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.06s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9764, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.12s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9613, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.08s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9894, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.14s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9744, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.11s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9558, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.22s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9581, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.13s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9593, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.19s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9672, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.18s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9549, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.19s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9465, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.10s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9556, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.05s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9556, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.15s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9742, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.12s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9742, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.23s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9810, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.16s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9915, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.16s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9859, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.27s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9701, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.36s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9673, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.35s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9632, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.15s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9946, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.51s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9905, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.15s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0163, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.37s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0096, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.24s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0351, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.27s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0166, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.21s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0319, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.19s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0343, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.21s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0416, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.06s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0657, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.13s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0501, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.33s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0487, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.16s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0521, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.28s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0566, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.40s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0449, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.33s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0753, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.30s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0787, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.38s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0770, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.15s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0921, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.16s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0827, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.35s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0811, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.32s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0922, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.36s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0689, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.25s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0609, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.18s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0456, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.32s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0541, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.42s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0662, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.15s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0622, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.15s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0718, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.18s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0913, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.34s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0823, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.24s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0768, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.24s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0802, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.17s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0824, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.20s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1080, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.34s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1199, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.44s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1111, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.22s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1169, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.29s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1042, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.26s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1197, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.39s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1352, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.39s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1547, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.27s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1483, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.24s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1734, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.39s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1695, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.35s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1557, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.22s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1527, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.45s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1618, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.21s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1445, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.41s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1283, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.41s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1400, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.27s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1429, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.34s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1450, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.17s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1325, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.36s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1420, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.29s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1462, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.25s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1458, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.36s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1467, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.31s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1452, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.29s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1650, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.27s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2068, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.53s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2006, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.35s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1989, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.22s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2184, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.24s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2405, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.28s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2318, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.25s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2269, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.27s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2215, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.28s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2271, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.44s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2289, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.36s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2237, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.50s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2455, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.30s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2565, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.32s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2385, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.26s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2563, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.32s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2581, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.26s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2587, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.28s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2523, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.29s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2620, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.33s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2638, Accuracy: 0.4990, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.25s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2565, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.29s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2733, Accuracy: 0.4990, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.24s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2727, Accuracy: 0.4990, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.36s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2605, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.26s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2487, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.38s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2514, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.58s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2690, Accuracy: 0.4990, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.40s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2573, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.30s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2567, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.19s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2481, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.33s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2387, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.39s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2212, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.37s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2140, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.36s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2103, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.33s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1963, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.46s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2061, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.44s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2055, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.34s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2007, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.30s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2024, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.50s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2055, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.36s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2048, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.32s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2359, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.30s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2432, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.49s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2381, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.47s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2388, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.45s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2461, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.38s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2553, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.21s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2813, Accuracy: 0.4990, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.54s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2674, Accuracy: 0.4990, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.54s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2812, Accuracy: 0.4990, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.38s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2863, Accuracy: 0.4990, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.41s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2846, Accuracy: 0.4990, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.95s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2876, Accuracy: 0.4990, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.49s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2915, Accuracy: 0.4990, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.39s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2866, Accuracy: 0.4990, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.40s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2665, Accuracy: 0.4990, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.48s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2477, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.45s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2505, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.48s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2511, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.38s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2472, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.73s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2425, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.47s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2407, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.38s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2227, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.35s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2267, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.42s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2479, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.43s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2528, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.42s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2513, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.33s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2573, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.26s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2581, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.59s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2607, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.53s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2590, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.77s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2667, Accuracy: 0.4990, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.47s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2524, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.50s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2452, Accuracy: 0.4991, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.39s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2096, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.41s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2092, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.32s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1992, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.34s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2075, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.44s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.2047, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.47s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1936, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.53s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1797, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.53s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1918, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.62s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1810, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.66s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1841, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.42s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1909, Accuracy: 0.4992, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.24s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1854, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.42s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1598, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.34s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1755, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.60s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1552, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.57s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1632, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.56s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1607, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.52s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1489, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.66s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1381, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.65s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1287, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.29s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1326, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.38s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1228, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.50s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1113, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.51s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1009, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.56s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0915, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.46s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0830, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.59s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0681, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.66s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0740, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.35s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0778, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.42s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0737, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.54s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0753, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.61s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0628, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.78s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0432, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.57s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0471, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.50s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0550, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.63s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0597, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.49s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0614, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.36s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0711, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.55s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0830, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.70s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0825, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.50s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0994, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.48s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1042, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.50s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0921, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.57s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0977, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.76s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0812, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.31s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0798, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.48s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1096, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.50s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1264, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.53s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1362, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.65s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1336, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.59s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1399, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.82s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1306, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.77s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1482, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.44s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1518, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.56s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1396, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.58s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1415, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.64s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1443, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.75s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1576, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.75s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1505, Accuracy: 0.4993, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.79s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1334, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.48s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1309, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.46s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1188, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.64s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1031, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.50s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0911, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.64s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0935, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.63s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1032, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.85s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0855, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.62s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0738, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.82s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0639, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.40s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0558, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.52s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0506, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.83s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0368, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.71s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0442, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.59s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0353, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.66s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0389, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.75s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0370, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.52s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0502, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.44s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0453, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.58s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0510, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.82s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0526, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.58s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0554, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.57s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0648, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.57s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0779, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.82s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0852, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.59s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0736, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.51s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0894, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.53s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0909, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.52s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1047, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.57s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0991, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.85s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.1110, Accuracy: 0.4994, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.66s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0993, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.81s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0867, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.57s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0836, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.60s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0881, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.80s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0848, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.65s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0939, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.59s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0879, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.75s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0981, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.87s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0782, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.78s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0687, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.49s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0582, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.51s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0692, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.62s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0765, Accuracy: 0.4995, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.71s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0660, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.74s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0669, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.74s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0520, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.70s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0481, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.74s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0599, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.49s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0522, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.70s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0640, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.85s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0583, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.91s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0489, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.73s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0460, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.65s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0273, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.73s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0291, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.53s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0234, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.60s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0289, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.70s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0314, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.69s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0248, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.63s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0275, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.65s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0174, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.82s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0262, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.77s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0209, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.68s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0236, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.72s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0263, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.71s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0297, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.73s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0340, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.67s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0256, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.76s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0227, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.77s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0189, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.85s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0181, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.46s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0099, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.56s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0212, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.86s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0147, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.75s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0129, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.78s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0029, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.78s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0098, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 2.01s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0294, Accuracy: 0.4996, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.55s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0228, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.75s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0245, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.61s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0128, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.70s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0065, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.72s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9833, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 2.07s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 196.0011, Accuracy: 0.4997, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.96s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9826, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.68s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9825, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.73s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9806, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.88s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9558, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.70s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9635, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.79s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9707, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.68s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9574, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.98s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9495, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.82s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9541, Accuracy: 0.4998, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.63s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9398, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.60s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9319, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.69s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9302, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.65s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9155, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.78s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9146, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.71s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9087, Accuracy: 0.5000, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.88s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))


Epoch [1/10], Step [100/1421], TrainLoss: nan; ValidLoss: 195.9172, Accuracy: 0.4999, Precision: 0.0000, Recall: 0.0000, curr_epoch_best_accuracy: 0.5053; Time: 1.82s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


KeyboardInterrupt: 