# Training ResNeXt

This Notebook will go through the process of training the ResNeXt model with an architecture that is optimized for the Cifar Dataset (as specified in [1] https://arxiv.org/pdf/1611.05431.pdf).

In [1]:
import os
import sys
import json
import shutil
import torch
import torch.nn.functional as F
import torchvision.datasets as dset
import torchvision.transforms as transforms
import time

In [2]:
sys.path.append("..") # Note, this line is needed only since this notebook is being run from inside 'bharat' directory
                      # If you run this notebook in the same folder as the the 'models-py-code' directory, remove this line
from pymodels.resnext import CifarResNeXt # This is where we actually refer to the model we wish to train

## Set all required parameters
Remember to change the `session_id` for each new model you wish to train or if you change of the other parameters

In [3]:
args={}

# Input Data Parameters
args['data_path'] = '/datasets/ee285s-public/'
args['dataset'] = 'cifar100' # The other option is 'cifar10'

# Optimization options
args['epochs'] = 300
args['start_epoch'] = 0 # Change only if necessary. Used for resuming sessions
args['batch_size'] = 96
args['learning_rate'] = 0.1
args['momentum'] = 0.9 # Momentum
args['decay'] = 0.0005 # Weight decay (L2 penalty)
args['test_bs'] = 64 # Test Batch Size
args['schedule'] = [75, 150] # Decrease learning rate at these epochs
args['gamma'] = 0.1 # LR is multiplied by gamma on schedule.

# Checkpoints and Session Parameters
args['save_dir'] = './' # Folder to save checkpoint file and best model file
args['load'] = 'resnext-0-checkpoint.pth' # Path to load Checkpoint file 
args['session_id'] = '0' # Remember to change the session id for each new model you train

# Architecture
args['depth'] = 29 # Model depth
args['cardinality'] = 8 # Model cardinality (group)
args['base_width'] = 64 # Number of channels in each group
args['widen_factor'] = 4 # Widen factor. 4 -> 64, 8 -> 128, ...

# Acceleration
args['ngpu'] = 1 # 0 = CPU
args['prefetch'] = 2 # Pre-fetching threads

# i/o
args['log_dir'] = './log/' # Log folder

### Initialize Logging

In [4]:
if not os.path.isdir(args['log_dir']):
        os.makedirs(args['log_dir'])
if args['load'] == '':
    log = open(os.path.join(args['log_dir'], 'resnext-{}-log.txt'.format(args['session_id'])), 'w')
else:
    log = open(os.path.join(args['log_dir'], 'resnext-{}-log.txt'.format(args['session_id'])), 'a')

### Setup the Input Data

In [5]:
mean = [x / 255 for x in [125.3, 123.0, 113.9]]
std = [x / 255 for x in [63.0, 62.1, 66.7]]

train_transform = transforms.Compose(
    [transforms.RandomHorizontalFlip(),
     transforms.RandomCrop(32, padding=4),
     transforms.ToTensor(),
     transforms.Normalize(mean, std)])

test_transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize(mean, std)])

if args['dataset'] == 'cifar10':
    train_data = dset.CIFAR10(args['data_path'], train=True, transform=train_transform, download=False)
    test_data = dset.CIFAR10(args['data_path'], train=False, transform=test_transform, download=False)
    nlabels = 10
else:
    train_data = dset.CIFAR100(args['data_path'], train=True, transform=train_transform, download=False)
    test_data = dset.CIFAR100(args['data_path'], train=False, transform=test_transform, download=False)
    nlabels = 100

train_loader = torch.utils.data.DataLoader(train_data, batch_size=args['batch_size'], shuffle=True,
                                                                       num_workers=args['prefetch'], pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=args['test_bs'], shuffle=False,
                                                                     num_workers=args['prefetch'], pin_memory=True)

### Setup the Model, Criterion, and Optimizer

In [6]:
resnext_model = CifarResNeXt(args['cardinality'], args['depth'], nlabels, args['base_width'], args['widen_factor'])
print(resnext_model)
if args['ngpu'] > 1:
    resnext_model = torch.nn.DataParallel(resnext_model, device_ids=list(range(args['ngpu'])))

if args['ngpu'] > 0:
    resnext_model.cuda()

optimizer = torch.optim.SGD(resnext_model.parameters(), args['learning_rate'], momentum=args['momentum'],
                            weight_decay=args['decay'], nesterov=True)

CifarResNeXt(
  (conv_1_3x3): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
  (stage_1): Sequential(
    (stage_1_bottleneck_0): ResNeXtBottleneck(
      (conv_reduce): Conv2d(64, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn_reduce): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True)
      (conv_conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=8, bias=False)
      (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True)
      (conv_expand): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn_expand): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      (shortcut): Sequential(
        (shortcut_conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (shortcut_bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      )
    )
    (stage_1_bottleneck_1): ResNeXtBottleneck(
      (

### Setup the Train and Test Functions

In [7]:
def train(epoch):
    resnext_model.train()
    loss_avg = 0.0
    t0 = time.time()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = torch.autograd.Variable(data.cuda()), torch.autograd.Variable(target.cuda())

        # forward
        output = resnext_model(data)

        # backward
        optimizer.zero_grad()
        t0 = time.time()
        loss = F.cross_entropy(output, target)
        t1 = time.time()
        loss.backward()
        optimizer.step()

        # exponential moving average
        loss_avg = loss_avg * 0.2 + float(loss) * 0.8

        print("===> Epoch[{}]({}/{}): Loss: {:.4f} || Timer: {:.4f} sec.".format(epoch, batch_idx, len(train_loader), loss.data[0], (t1 - t0)))

    epoch_state['train_loss'] = loss_avg
    t1 = time.time()
    print("===> Epoch {} Complete: Avg. Loss: {:.4f} | Time: {:.4f}".format(epoch, loss_avg, (t1-t0)))

# test function (forward only)
def test():
    resnext_model.eval()
    loss_avg = 0.0
    correct = 0
    for batch_idx, (data, target) in enumerate(test_loader):
        data, target = torch.autograd.Variable(data.cuda()), torch.autograd.Variable(target.cuda())

        # forward
        output = resnext_model(data)
        loss = F.cross_entropy(output, target)

        # accuracy
        pred = output.data.max(1)[1]
        correct += float(pred.eq(target.data).sum())

        # test loss average
        loss_avg += float(loss)

    epoch_state['test_loss'] = loss_avg / len(test_loader)
    epoch_state['test_accuracy'] = correct / len(test_loader.dataset)

### Define a function to save a checkpoint

In [8]:
def save_checkpoint(state, is_best, filename='resnext-{}-checkpoint.pth'.format(args['session_id'])):
    filepath = os.path.join(args['save_dir'], filename)
    torch.save(state, filepath)
    if is_best:
        bestfilepath = os.path.join(args['save_dir'], 'model-best-resnext-{}.pth'.format(args['session_id']))
        shutil.copyfile(filepath, bestfilepath)

### Load and Resume from checkpoint (if set)

In [9]:
if not args['load'] == '':
    if os.path.isfile(args['load']):
        print("=> Loading Checkpoint '{}'".format(args['load']))
        checkpoint = torch.load(args['load'])
        args['start_epoch'] = checkpoint['epoch'] + 1
        resnext_model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("=> Loaded checkpoint '{}' (epoch {})"
              .format(args['load'], checkpoint['epoch']))
    else:
        print("=> No checkpoint found at '{}'".format(args['load']))

=> Loading Checkpoint 'resnext-0-checkpoint.pth'
=> Loaded checkpoint 'resnext-0-checkpoint.pth' (epoch 38)


### Main Loop

In [None]:
best_accuracy = 0.0
epoch_state = {'args' : args}
for epoch in range(args['start_epoch'], args['epochs']):
    if epoch in args['schedule']:
        epoch_state['learning_rate'] *= args['gamma']
        for param_group in optimizer.param_groups:
            param_group['lr'] = epoch_state['learning_rate']

    epoch_state['epoch'] = epoch
    train(epoch)
    test()
    if epoch_state['test_accuracy'] > best_accuracy:
        best_accuracy = epoch_state['test_accuracy']
        isbest = True
    
    save_checkpoint({
        'epoch' : epoch,
        'state_dict' : resnext_model.state_dict(),
        'optimizer' : optimizer.state_dict()
    }, isbest)
    
    log.write('%s\n' % json.dumps(epoch_state))
    log.flush()
    #print(epoch_state)
    print("[Epoch {}] Best accuracy: {:.4f}".format(epoch, best_accuracy))

log.close()

===> Epoch[39](0/521): Loss: 0.9516 || Timer: 0.0015 sec.
===> Epoch[39](1/521): Loss: 1.2045 || Timer: 0.0004 sec.
===> Epoch[39](2/521): Loss: 0.7910 || Timer: 0.0002 sec.
===> Epoch[39](3/521): Loss: 1.0417 || Timer: 0.0003 sec.
===> Epoch[39](4/521): Loss: 1.0221 || Timer: 0.0005 sec.
===> Epoch[39](5/521): Loss: 0.9995 || Timer: 0.0003 sec.
===> Epoch[39](6/521): Loss: 0.9543 || Timer: 0.0002 sec.
===> Epoch[39](7/521): Loss: 1.0648 || Timer: 0.0005 sec.
===> Epoch[39](8/521): Loss: 1.1851 || Timer: 0.0003 sec.
===> Epoch[39](9/521): Loss: 0.9705 || Timer: 0.0002 sec.
===> Epoch[39](10/521): Loss: 1.2722 || Timer: 0.0003 sec.
===> Epoch[39](11/521): Loss: 0.8576 || Timer: 0.0004 sec.
===> Epoch[39](12/521): Loss: 0.8488 || Timer: 0.0002 sec.
===> Epoch[39](13/521): Loss: 0.8266 || Timer: 0.0002 sec.
===> Epoch[39](14/521): Loss: 1.1461 || Timer: 0.0002 sec.
===> Epoch[39](15/521): Loss: 0.8802 || Timer: 0.0003 sec.
===> Epoch[39](16/521): Loss: 0.8214 || Timer: 0.0003 sec.
===> Ep

===> Epoch[39](139/521): Loss: 0.9178 || Timer: 0.0002 sec.
===> Epoch[39](140/521): Loss: 0.9447 || Timer: 0.0002 sec.
===> Epoch[39](141/521): Loss: 0.8855 || Timer: 0.0002 sec.
===> Epoch[39](142/521): Loss: 0.9994 || Timer: 0.0002 sec.
===> Epoch[39](143/521): Loss: 1.3385 || Timer: 0.0002 sec.
===> Epoch[39](144/521): Loss: 1.1263 || Timer: 0.0002 sec.
===> Epoch[39](145/521): Loss: 1.0224 || Timer: 0.0002 sec.
===> Epoch[39](146/521): Loss: 0.8236 || Timer: 0.0002 sec.
===> Epoch[39](147/521): Loss: 1.1117 || Timer: 0.0002 sec.
===> Epoch[39](148/521): Loss: 1.0006 || Timer: 0.0002 sec.
===> Epoch[39](149/521): Loss: 1.0253 || Timer: 0.0002 sec.
===> Epoch[39](150/521): Loss: 0.8203 || Timer: 0.0002 sec.
===> Epoch[39](151/521): Loss: 0.9378 || Timer: 0.0001 sec.
===> Epoch[39](152/521): Loss: 1.3077 || Timer: 0.0002 sec.
===> Epoch[39](153/521): Loss: 1.2198 || Timer: 0.0002 sec.
===> Epoch[39](154/521): Loss: 0.9018 || Timer: 0.0002 sec.
===> Epoch[39](155/521): Loss: 1.1103 ||

===> Epoch[39](492/521): Loss: 1.3189 || Timer: 0.0002 sec.
===> Epoch[39](493/521): Loss: 1.2800 || Timer: 0.0002 sec.
===> Epoch[39](494/521): Loss: 1.3123 || Timer: 0.0002 sec.
===> Epoch[39](495/521): Loss: 0.9740 || Timer: 0.0002 sec.
===> Epoch[39](496/521): Loss: 0.9982 || Timer: 0.0002 sec.
===> Epoch[39](497/521): Loss: 1.1815 || Timer: 0.0002 sec.
===> Epoch[39](498/521): Loss: 0.7197 || Timer: 0.0002 sec.
===> Epoch[39](499/521): Loss: 1.1837 || Timer: 0.0002 sec.
===> Epoch[39](500/521): Loss: 1.1266 || Timer: 0.0002 sec.
===> Epoch[39](501/521): Loss: 1.0225 || Timer: 0.0001 sec.
===> Epoch[39](502/521): Loss: 1.2788 || Timer: 0.0001 sec.
===> Epoch[39](503/521): Loss: 1.0202 || Timer: 0.0001 sec.
===> Epoch[39](504/521): Loss: 1.2430 || Timer: 0.0001 sec.
===> Epoch[39](505/521): Loss: 0.9925 || Timer: 0.0003 sec.
===> Epoch[39](506/521): Loss: 1.1391 || Timer: 0.0002 sec.
===> Epoch[39](507/521): Loss: 0.8131 || Timer: 0.0002 sec.
===> Epoch[39](508/521): Loss: 1.2451 ||

===> Epoch[40](108/521): Loss: 0.9781 || Timer: 0.0001 sec.
===> Epoch[40](109/521): Loss: 1.1028 || Timer: 0.0002 sec.
===> Epoch[40](110/521): Loss: 0.8473 || Timer: 0.0002 sec.
===> Epoch[40](111/521): Loss: 1.0487 || Timer: 0.0002 sec.
===> Epoch[40](112/521): Loss: 1.1214 || Timer: 0.0002 sec.
===> Epoch[40](113/521): Loss: 1.2631 || Timer: 0.0002 sec.
===> Epoch[40](114/521): Loss: 0.6777 || Timer: 0.0002 sec.
===> Epoch[40](115/521): Loss: 0.9806 || Timer: 0.0002 sec.
===> Epoch[40](116/521): Loss: 0.8694 || Timer: 0.0002 sec.
===> Epoch[40](117/521): Loss: 1.2120 || Timer: 0.0002 sec.
===> Epoch[40](118/521): Loss: 1.1012 || Timer: 0.0002 sec.
===> Epoch[40](119/521): Loss: 0.8917 || Timer: 0.0002 sec.
===> Epoch[40](120/521): Loss: 0.9228 || Timer: 0.0002 sec.
===> Epoch[40](121/521): Loss: 0.7710 || Timer: 0.0002 sec.
===> Epoch[40](122/521): Loss: 0.8642 || Timer: 0.0002 sec.
===> Epoch[40](123/521): Loss: 1.0127 || Timer: 0.0002 sec.
===> Epoch[40](124/521): Loss: 0.8187 ||

===> Epoch[40](245/521): Loss: 1.0421 || Timer: 0.0002 sec.
===> Epoch[40](246/521): Loss: 0.9235 || Timer: 0.0002 sec.
===> Epoch[40](247/521): Loss: 0.8060 || Timer: 0.0002 sec.
===> Epoch[40](248/521): Loss: 0.6631 || Timer: 0.0002 sec.
===> Epoch[40](249/521): Loss: 1.0758 || Timer: 0.0002 sec.
===> Epoch[40](250/521): Loss: 0.7525 || Timer: 0.0002 sec.
===> Epoch[40](251/521): Loss: 1.2306 || Timer: 0.0002 sec.
===> Epoch[40](252/521): Loss: 0.9444 || Timer: 0.0002 sec.
===> Epoch[40](253/521): Loss: 1.0682 || Timer: 0.0002 sec.
===> Epoch[40](254/521): Loss: 1.1285 || Timer: 0.0002 sec.
===> Epoch[40](255/521): Loss: 1.1052 || Timer: 0.0002 sec.
===> Epoch[40](256/521): Loss: 1.0515 || Timer: 0.0002 sec.
===> Epoch[40](257/521): Loss: 1.0172 || Timer: 0.0002 sec.
===> Epoch[40](258/521): Loss: 1.1004 || Timer: 0.0002 sec.
===> Epoch[40](259/521): Loss: 1.1600 || Timer: 0.0002 sec.
===> Epoch[40](260/521): Loss: 1.0207 || Timer: 0.0002 sec.
===> Epoch[40](261/521): Loss: 1.0508 ||

===> Epoch[40](382/521): Loss: 0.9690 || Timer: 0.0002 sec.
===> Epoch[40](383/521): Loss: 0.9117 || Timer: 0.0002 sec.
===> Epoch[40](384/521): Loss: 1.2082 || Timer: 0.0002 sec.
===> Epoch[40](385/521): Loss: 1.0179 || Timer: 0.0002 sec.
===> Epoch[40](386/521): Loss: 0.9587 || Timer: 0.0002 sec.
===> Epoch[40](387/521): Loss: 1.1547 || Timer: 0.0002 sec.
===> Epoch[40](388/521): Loss: 0.8771 || Timer: 0.0002 sec.
===> Epoch[40](389/521): Loss: 0.9326 || Timer: 0.0002 sec.
===> Epoch[40](390/521): Loss: 1.2420 || Timer: 0.0002 sec.
===> Epoch[40](391/521): Loss: 1.1272 || Timer: 0.0002 sec.
===> Epoch[40](392/521): Loss: 1.2539 || Timer: 0.0002 sec.
===> Epoch[40](393/521): Loss: 1.0832 || Timer: 0.0002 sec.
===> Epoch[40](394/521): Loss: 1.0673 || Timer: 0.0002 sec.
===> Epoch[40](395/521): Loss: 0.9003 || Timer: 0.0002 sec.
===> Epoch[40](396/521): Loss: 1.0608 || Timer: 0.0002 sec.
===> Epoch[40](397/521): Loss: 1.2072 || Timer: 0.0002 sec.
===> Epoch[40](398/521): Loss: 0.7466 ||

===> Epoch[40](519/521): Loss: 1.0587 || Timer: 0.0002 sec.
===> Epoch[40](520/521): Loss: 1.0208 || Timer: 0.0002 sec.
===> Epoch 40 Complete: Avg. Loss: 1.0309 | Time: 0.3893
[Epoch 40] Best accuracy: 0.5843
===> Epoch[41](0/521): Loss: 0.8606 || Timer: 0.0001 sec.
===> Epoch[41](1/521): Loss: 0.7935 || Timer: 0.0002 sec.
===> Epoch[41](2/521): Loss: 0.9800 || Timer: 0.0002 sec.
===> Epoch[41](3/521): Loss: 1.0186 || Timer: 0.0002 sec.
===> Epoch[41](4/521): Loss: 1.0445 || Timer: 0.0002 sec.
===> Epoch[41](5/521): Loss: 0.9514 || Timer: 0.0002 sec.
===> Epoch[41](6/521): Loss: 0.7574 || Timer: 0.0002 sec.
===> Epoch[41](7/521): Loss: 0.8943 || Timer: 0.0002 sec.
===> Epoch[41](8/521): Loss: 0.8105 || Timer: 0.0002 sec.
===> Epoch[41](9/521): Loss: 0.9375 || Timer: 0.0002 sec.
===> Epoch[41](10/521): Loss: 0.9154 || Timer: 0.0002 sec.
===> Epoch[41](11/521): Loss: 0.9304 || Timer: 0.0002 sec.
===> Epoch[41](12/521): Loss: 0.6816 || Timer: 0.0002 sec.
===> Epoch[41](13/521): Loss: 0.9

===> Epoch[41](135/521): Loss: 0.8795 || Timer: 0.0002 sec.
===> Epoch[41](136/521): Loss: 1.0448 || Timer: 0.0002 sec.
===> Epoch[41](137/521): Loss: 1.0249 || Timer: 0.0002 sec.
===> Epoch[41](138/521): Loss: 1.0569 || Timer: 0.0002 sec.
===> Epoch[41](139/521): Loss: 0.9677 || Timer: 0.0002 sec.
===> Epoch[41](140/521): Loss: 1.0314 || Timer: 0.0002 sec.
===> Epoch[41](141/521): Loss: 1.0498 || Timer: 0.0002 sec.
===> Epoch[41](142/521): Loss: 0.9321 || Timer: 0.0002 sec.
===> Epoch[41](143/521): Loss: 1.2507 || Timer: 0.0002 sec.
===> Epoch[41](144/521): Loss: 1.1486 || Timer: 0.0002 sec.
===> Epoch[41](145/521): Loss: 1.0595 || Timer: 0.0003 sec.
===> Epoch[41](146/521): Loss: 0.9212 || Timer: 0.0002 sec.
===> Epoch[41](147/521): Loss: 1.1217 || Timer: 0.0002 sec.
===> Epoch[41](148/521): Loss: 1.0837 || Timer: 0.0002 sec.
===> Epoch[41](149/521): Loss: 0.9712 || Timer: 0.0002 sec.
===> Epoch[41](150/521): Loss: 0.9160 || Timer: 0.0002 sec.
===> Epoch[41](151/521): Loss: 1.1479 ||

===> Epoch[41](272/521): Loss: 0.9028 || Timer: 0.0003 sec.
===> Epoch[41](273/521): Loss: 0.9988 || Timer: 0.0002 sec.
===> Epoch[41](274/521): Loss: 1.0055 || Timer: 0.0002 sec.
===> Epoch[41](275/521): Loss: 0.8834 || Timer: 0.0002 sec.
===> Epoch[41](276/521): Loss: 1.0045 || Timer: 0.0002 sec.
===> Epoch[41](277/521): Loss: 0.7692 || Timer: 0.0002 sec.
===> Epoch[41](278/521): Loss: 1.1345 || Timer: 0.0002 sec.
===> Epoch[41](279/521): Loss: 1.4327 || Timer: 0.0002 sec.
===> Epoch[41](280/521): Loss: 1.0455 || Timer: 0.0002 sec.
===> Epoch[41](281/521): Loss: 1.2316 || Timer: 0.0002 sec.
===> Epoch[41](282/521): Loss: 1.2156 || Timer: 0.0002 sec.
===> Epoch[41](283/521): Loss: 0.9610 || Timer: 0.0002 sec.
===> Epoch[41](284/521): Loss: 1.5396 || Timer: 0.0002 sec.
===> Epoch[41](285/521): Loss: 1.2316 || Timer: 0.0002 sec.
===> Epoch[41](286/521): Loss: 1.0836 || Timer: 0.0002 sec.
===> Epoch[41](287/521): Loss: 0.9805 || Timer: 0.0002 sec.
===> Epoch[41](288/521): Loss: 0.9993 ||

===> Epoch[41](409/521): Loss: 1.1239 || Timer: 0.0002 sec.
===> Epoch[41](410/521): Loss: 0.7514 || Timer: 0.0002 sec.
===> Epoch[41](411/521): Loss: 0.9977 || Timer: 0.0002 sec.
===> Epoch[41](412/521): Loss: 1.0116 || Timer: 0.0002 sec.
===> Epoch[41](413/521): Loss: 1.1820 || Timer: 0.0002 sec.
===> Epoch[41](414/521): Loss: 1.0542 || Timer: 0.0002 sec.
===> Epoch[41](415/521): Loss: 1.1308 || Timer: 0.0002 sec.
===> Epoch[41](416/521): Loss: 1.0379 || Timer: 0.0002 sec.
===> Epoch[41](417/521): Loss: 1.1204 || Timer: 0.0002 sec.
===> Epoch[41](418/521): Loss: 0.7286 || Timer: 0.0002 sec.
===> Epoch[41](419/521): Loss: 0.9653 || Timer: 0.0002 sec.
===> Epoch[41](420/521): Loss: 1.0646 || Timer: 0.0002 sec.
===> Epoch[41](421/521): Loss: 0.8958 || Timer: 0.0002 sec.
===> Epoch[41](422/521): Loss: 1.1941 || Timer: 0.0002 sec.
===> Epoch[41](423/521): Loss: 1.3518 || Timer: 0.0002 sec.
===> Epoch[41](424/521): Loss: 1.2267 || Timer: 0.0002 sec.
===> Epoch[41](425/521): Loss: 1.0463 ||

===> Epoch[42](24/521): Loss: 0.8037 || Timer: 0.0002 sec.
===> Epoch[42](25/521): Loss: 0.7971 || Timer: 0.0002 sec.
===> Epoch[42](26/521): Loss: 1.0718 || Timer: 0.0002 sec.
===> Epoch[42](27/521): Loss: 0.8446 || Timer: 0.0002 sec.
===> Epoch[42](28/521): Loss: 0.8870 || Timer: 0.0002 sec.
===> Epoch[42](29/521): Loss: 0.9013 || Timer: 0.0002 sec.
===> Epoch[42](30/521): Loss: 0.8207 || Timer: 0.0002 sec.
===> Epoch[42](31/521): Loss: 0.7544 || Timer: 0.0003 sec.
===> Epoch[42](32/521): Loss: 0.8864 || Timer: 0.0002 sec.
===> Epoch[42](33/521): Loss: 1.0195 || Timer: 0.0002 sec.
===> Epoch[42](34/521): Loss: 0.9761 || Timer: 0.0002 sec.
===> Epoch[42](35/521): Loss: 0.8444 || Timer: 0.0002 sec.
===> Epoch[42](36/521): Loss: 0.8066 || Timer: 0.0002 sec.
===> Epoch[42](37/521): Loss: 0.7978 || Timer: 0.0002 sec.
===> Epoch[42](38/521): Loss: 0.9213 || Timer: 0.0002 sec.
===> Epoch[42](39/521): Loss: 0.8699 || Timer: 0.0002 sec.
===> Epoch[42](40/521): Loss: 0.9475 || Timer: 0.0002 se

===> Epoch[42](162/521): Loss: 1.1931 || Timer: 0.0002 sec.
===> Epoch[42](163/521): Loss: 0.9751 || Timer: 0.0002 sec.
===> Epoch[42](164/521): Loss: 1.1395 || Timer: 0.0002 sec.
===> Epoch[42](165/521): Loss: 0.7252 || Timer: 0.0002 sec.
===> Epoch[42](166/521): Loss: 1.5096 || Timer: 0.0002 sec.
===> Epoch[42](167/521): Loss: 1.0196 || Timer: 0.0002 sec.
===> Epoch[42](168/521): Loss: 1.0802 || Timer: 0.0002 sec.
===> Epoch[42](169/521): Loss: 0.9530 || Timer: 0.0002 sec.
===> Epoch[42](170/521): Loss: 0.8962 || Timer: 0.0002 sec.
===> Epoch[42](171/521): Loss: 1.1341 || Timer: 0.0002 sec.
===> Epoch[42](172/521): Loss: 1.0327 || Timer: 0.0002 sec.
===> Epoch[42](173/521): Loss: 0.9842 || Timer: 0.0002 sec.
===> Epoch[42](174/521): Loss: 1.0273 || Timer: 0.0002 sec.
===> Epoch[42](175/521): Loss: 1.1006 || Timer: 0.0002 sec.
===> Epoch[42](176/521): Loss: 0.9364 || Timer: 0.0002 sec.
===> Epoch[42](177/521): Loss: 0.9523 || Timer: 0.0002 sec.
===> Epoch[42](178/521): Loss: 0.7702 ||

===> Epoch[42](299/521): Loss: 0.9096 || Timer: 0.0002 sec.
===> Epoch[42](300/521): Loss: 1.3714 || Timer: 0.0002 sec.
===> Epoch[42](301/521): Loss: 1.0783 || Timer: 0.0002 sec.
===> Epoch[42](302/521): Loss: 1.1413 || Timer: 0.0002 sec.
===> Epoch[42](303/521): Loss: 1.2036 || Timer: 0.0002 sec.
===> Epoch[42](304/521): Loss: 0.9611 || Timer: 0.0002 sec.
===> Epoch[42](305/521): Loss: 1.0342 || Timer: 0.0002 sec.
===> Epoch[42](306/521): Loss: 1.1842 || Timer: 0.0002 sec.
===> Epoch[42](307/521): Loss: 1.0165 || Timer: 0.0002 sec.
===> Epoch[42](308/521): Loss: 0.9400 || Timer: 0.0002 sec.
===> Epoch[42](309/521): Loss: 0.8714 || Timer: 0.0002 sec.
===> Epoch[42](310/521): Loss: 1.0864 || Timer: 0.0002 sec.
===> Epoch[42](311/521): Loss: 1.0466 || Timer: 0.0002 sec.
===> Epoch[42](312/521): Loss: 1.1548 || Timer: 0.0002 sec.
===> Epoch[42](313/521): Loss: 1.0666 || Timer: 0.0002 sec.
===> Epoch[42](314/521): Loss: 1.2261 || Timer: 0.0002 sec.
===> Epoch[42](315/521): Loss: 1.2841 ||

===> Epoch[42](436/521): Loss: 0.9971 || Timer: 0.0002 sec.
===> Epoch[42](437/521): Loss: 1.1844 || Timer: 0.0002 sec.
===> Epoch[42](438/521): Loss: 0.9669 || Timer: 0.0002 sec.
===> Epoch[42](439/521): Loss: 1.2745 || Timer: 0.0002 sec.
===> Epoch[42](440/521): Loss: 1.2902 || Timer: 0.0002 sec.
===> Epoch[42](441/521): Loss: 1.0046 || Timer: 0.0002 sec.
===> Epoch[42](442/521): Loss: 1.4636 || Timer: 0.0002 sec.
===> Epoch[42](443/521): Loss: 1.0286 || Timer: 0.0002 sec.
===> Epoch[42](444/521): Loss: 1.0332 || Timer: 0.0002 sec.
===> Epoch[42](445/521): Loss: 0.9134 || Timer: 0.0002 sec.
===> Epoch[42](446/521): Loss: 1.0868 || Timer: 0.0002 sec.
===> Epoch[42](447/521): Loss: 1.1568 || Timer: 0.0002 sec.
===> Epoch[42](448/521): Loss: 1.0095 || Timer: 0.0002 sec.
===> Epoch[42](449/521): Loss: 1.0114 || Timer: 0.0002 sec.
===> Epoch[42](450/521): Loss: 1.2450 || Timer: 0.0002 sec.
===> Epoch[42](451/521): Loss: 1.3848 || Timer: 0.0002 sec.
===> Epoch[42](452/521): Loss: 1.0357 ||

===> Epoch[43](52/521): Loss: 0.9541 || Timer: 0.0002 sec.
===> Epoch[43](53/521): Loss: 0.7780 || Timer: 0.0002 sec.
===> Epoch[43](54/521): Loss: 0.8219 || Timer: 0.0002 sec.
===> Epoch[43](55/521): Loss: 0.7752 || Timer: 0.0002 sec.
===> Epoch[43](56/521): Loss: 1.1630 || Timer: 0.0002 sec.
===> Epoch[43](57/521): Loss: 0.8441 || Timer: 0.0002 sec.
===> Epoch[43](58/521): Loss: 0.8809 || Timer: 0.0002 sec.
===> Epoch[43](59/521): Loss: 0.7443 || Timer: 0.0003 sec.
===> Epoch[43](60/521): Loss: 0.8335 || Timer: 0.0002 sec.
===> Epoch[43](61/521): Loss: 0.7930 || Timer: 0.0002 sec.
===> Epoch[43](62/521): Loss: 1.0913 || Timer: 0.0002 sec.
===> Epoch[43](63/521): Loss: 1.0753 || Timer: 0.0002 sec.
===> Epoch[43](64/521): Loss: 0.9238 || Timer: 0.0002 sec.
===> Epoch[43](65/521): Loss: 0.8210 || Timer: 0.0002 sec.
===> Epoch[43](66/521): Loss: 0.8497 || Timer: 0.0002 sec.
===> Epoch[43](67/521): Loss: 1.2043 || Timer: 0.0002 sec.
===> Epoch[43](68/521): Loss: 1.0697 || Timer: 0.0002 se

===> Epoch[43](190/521): Loss: 1.1549 || Timer: 0.0002 sec.
===> Epoch[43](191/521): Loss: 1.0308 || Timer: 0.0002 sec.
===> Epoch[43](192/521): Loss: 0.9822 || Timer: 0.0002 sec.
===> Epoch[43](193/521): Loss: 0.8543 || Timer: 0.0002 sec.
===> Epoch[43](194/521): Loss: 0.8657 || Timer: 0.0002 sec.
===> Epoch[43](195/521): Loss: 1.0240 || Timer: 0.0002 sec.
===> Epoch[43](196/521): Loss: 0.9556 || Timer: 0.0002 sec.
===> Epoch[43](197/521): Loss: 1.1467 || Timer: 0.0002 sec.
===> Epoch[43](198/521): Loss: 0.6495 || Timer: 0.0002 sec.
===> Epoch[43](199/521): Loss: 1.0675 || Timer: 0.0002 sec.
===> Epoch[43](200/521): Loss: 1.1421 || Timer: 0.0002 sec.
===> Epoch[43](201/521): Loss: 0.9216 || Timer: 0.0002 sec.
===> Epoch[43](202/521): Loss: 0.9133 || Timer: 0.0002 sec.
===> Epoch[43](203/521): Loss: 1.0516 || Timer: 0.0002 sec.
===> Epoch[43](204/521): Loss: 0.9739 || Timer: 0.0003 sec.
===> Epoch[43](205/521): Loss: 1.1193 || Timer: 0.0002 sec.
===> Epoch[43](206/521): Loss: 1.0586 ||

===> Epoch[43](327/521): Loss: 1.2065 || Timer: 0.0002 sec.
===> Epoch[43](328/521): Loss: 0.9853 || Timer: 0.0002 sec.
===> Epoch[43](329/521): Loss: 0.8459 || Timer: 0.0002 sec.
===> Epoch[43](330/521): Loss: 0.8926 || Timer: 0.0002 sec.
===> Epoch[43](331/521): Loss: 1.1535 || Timer: 0.0002 sec.
===> Epoch[43](332/521): Loss: 0.9143 || Timer: 0.0002 sec.
===> Epoch[43](333/521): Loss: 1.1885 || Timer: 0.0002 sec.
===> Epoch[43](334/521): Loss: 1.0347 || Timer: 0.0002 sec.
===> Epoch[43](335/521): Loss: 1.2226 || Timer: 0.0002 sec.
===> Epoch[43](336/521): Loss: 1.0060 || Timer: 0.0002 sec.
===> Epoch[43](337/521): Loss: 0.8417 || Timer: 0.0002 sec.
===> Epoch[43](338/521): Loss: 0.9354 || Timer: 0.0002 sec.
===> Epoch[43](339/521): Loss: 1.1249 || Timer: 0.0002 sec.
===> Epoch[43](340/521): Loss: 1.1756 || Timer: 0.0002 sec.
===> Epoch[43](341/521): Loss: 1.1922 || Timer: 0.0002 sec.
===> Epoch[43](342/521): Loss: 1.0868 || Timer: 0.0002 sec.
===> Epoch[43](343/521): Loss: 1.0640 ||

===> Epoch[43](464/521): Loss: 0.6379 || Timer: 0.0002 sec.
===> Epoch[43](465/521): Loss: 0.8352 || Timer: 0.0002 sec.
===> Epoch[43](466/521): Loss: 1.0996 || Timer: 0.0002 sec.
===> Epoch[43](467/521): Loss: 1.1944 || Timer: 0.0002 sec.
===> Epoch[43](468/521): Loss: 1.0561 || Timer: 0.0002 sec.
===> Epoch[43](469/521): Loss: 1.1673 || Timer: 0.0002 sec.
===> Epoch[43](470/521): Loss: 1.1609 || Timer: 0.0002 sec.
===> Epoch[43](471/521): Loss: 1.1415 || Timer: 0.0002 sec.
===> Epoch[43](472/521): Loss: 1.0443 || Timer: 0.0002 sec.
===> Epoch[43](473/521): Loss: 0.9539 || Timer: 0.0002 sec.
===> Epoch[43](474/521): Loss: 1.0601 || Timer: 0.0002 sec.
===> Epoch[43](475/521): Loss: 0.9629 || Timer: 0.0002 sec.
===> Epoch[43](476/521): Loss: 1.0997 || Timer: 0.0002 sec.
===> Epoch[43](477/521): Loss: 0.9173 || Timer: 0.0002 sec.
===> Epoch[43](478/521): Loss: 0.8922 || Timer: 0.0002 sec.
===> Epoch[43](479/521): Loss: 1.0921 || Timer: 0.0002 sec.
===> Epoch[43](480/521): Loss: 1.0994 ||

===> Epoch[44](80/521): Loss: 0.9906 || Timer: 0.0002 sec.
===> Epoch[44](81/521): Loss: 1.0036 || Timer: 0.0002 sec.
===> Epoch[44](82/521): Loss: 1.0296 || Timer: 0.0002 sec.
===> Epoch[44](83/521): Loss: 0.7059 || Timer: 0.0002 sec.
===> Epoch[44](84/521): Loss: 0.9032 || Timer: 0.0002 sec.
===> Epoch[44](85/521): Loss: 1.1898 || Timer: 0.0002 sec.
===> Epoch[44](86/521): Loss: 0.8531 || Timer: 0.0002 sec.
===> Epoch[44](87/521): Loss: 1.0147 || Timer: 0.0002 sec.
===> Epoch[44](88/521): Loss: 0.8802 || Timer: 0.0002 sec.
===> Epoch[44](89/521): Loss: 0.8610 || Timer: 0.0002 sec.
===> Epoch[44](90/521): Loss: 0.8294 || Timer: 0.0002 sec.
===> Epoch[44](91/521): Loss: 1.1926 || Timer: 0.0002 sec.
===> Epoch[44](92/521): Loss: 0.9382 || Timer: 0.0002 sec.
===> Epoch[44](93/521): Loss: 0.9163 || Timer: 0.0002 sec.
===> Epoch[44](94/521): Loss: 0.8249 || Timer: 0.0002 sec.
===> Epoch[44](95/521): Loss: 0.8827 || Timer: 0.0002 sec.
===> Epoch[44](96/521): Loss: 0.9543 || Timer: 0.0002 se

===> Epoch[44](217/521): Loss: 1.2312 || Timer: 0.0002 sec.
===> Epoch[44](218/521): Loss: 0.8107 || Timer: 0.0002 sec.
===> Epoch[44](219/521): Loss: 0.9686 || Timer: 0.0002 sec.
===> Epoch[44](220/521): Loss: 1.1982 || Timer: 0.0002 sec.
===> Epoch[44](221/521): Loss: 1.0181 || Timer: 0.0002 sec.
===> Epoch[44](222/521): Loss: 1.0376 || Timer: 0.0002 sec.
===> Epoch[44](223/521): Loss: 0.9219 || Timer: 0.0002 sec.
===> Epoch[44](224/521): Loss: 0.9408 || Timer: 0.0002 sec.
===> Epoch[44](225/521): Loss: 0.9549 || Timer: 0.0003 sec.
===> Epoch[44](226/521): Loss: 1.0274 || Timer: 0.0002 sec.
===> Epoch[44](227/521): Loss: 1.0393 || Timer: 0.0002 sec.
===> Epoch[44](228/521): Loss: 1.0079 || Timer: 0.0003 sec.
===> Epoch[44](229/521): Loss: 0.9077 || Timer: 0.0002 sec.
===> Epoch[44](230/521): Loss: 0.9515 || Timer: 0.0002 sec.
===> Epoch[44](231/521): Loss: 0.9256 || Timer: 0.0002 sec.
===> Epoch[44](232/521): Loss: 1.0274 || Timer: 0.0002 sec.
===> Epoch[44](233/521): Loss: 0.9716 ||

===> Epoch[44](354/521): Loss: 1.0359 || Timer: 0.0002 sec.
===> Epoch[44](355/521): Loss: 0.8821 || Timer: 0.0002 sec.
===> Epoch[44](356/521): Loss: 1.1067 || Timer: 0.0002 sec.
===> Epoch[44](357/521): Loss: 1.2359 || Timer: 0.0002 sec.
===> Epoch[44](358/521): Loss: 1.1917 || Timer: 0.0002 sec.
===> Epoch[44](359/521): Loss: 1.1243 || Timer: 0.0002 sec.
===> Epoch[44](360/521): Loss: 1.1829 || Timer: 0.0002 sec.
===> Epoch[44](361/521): Loss: 1.2388 || Timer: 0.0002 sec.
===> Epoch[44](362/521): Loss: 1.1222 || Timer: 0.0002 sec.
===> Epoch[44](363/521): Loss: 0.9319 || Timer: 0.0002 sec.
===> Epoch[44](364/521): Loss: 1.0333 || Timer: 0.0002 sec.
===> Epoch[44](365/521): Loss: 1.1483 || Timer: 0.0002 sec.
===> Epoch[44](366/521): Loss: 0.8230 || Timer: 0.0002 sec.
===> Epoch[44](367/521): Loss: 1.1192 || Timer: 0.0002 sec.
===> Epoch[44](368/521): Loss: 1.2489 || Timer: 0.0002 sec.
===> Epoch[44](369/521): Loss: 0.9501 || Timer: 0.0002 sec.
===> Epoch[44](370/521): Loss: 1.1446 ||

===> Epoch[44](491/521): Loss: 1.0133 || Timer: 0.0002 sec.
===> Epoch[44](492/521): Loss: 1.0006 || Timer: 0.0002 sec.
===> Epoch[44](493/521): Loss: 1.2708 || Timer: 0.0002 sec.
===> Epoch[44](494/521): Loss: 1.2713 || Timer: 0.0002 sec.
===> Epoch[44](495/521): Loss: 1.0877 || Timer: 0.0002 sec.
===> Epoch[44](496/521): Loss: 1.1253 || Timer: 0.0002 sec.
===> Epoch[44](497/521): Loss: 1.2362 || Timer: 0.0002 sec.
===> Epoch[44](498/521): Loss: 1.0377 || Timer: 0.0002 sec.
===> Epoch[44](499/521): Loss: 1.1352 || Timer: 0.0002 sec.
===> Epoch[44](500/521): Loss: 0.9516 || Timer: 0.0003 sec.
===> Epoch[44](501/521): Loss: 1.2712 || Timer: 0.0002 sec.
===> Epoch[44](502/521): Loss: 1.1397 || Timer: 0.0002 sec.
===> Epoch[44](503/521): Loss: 1.1402 || Timer: 0.0002 sec.
===> Epoch[44](504/521): Loss: 1.2675 || Timer: 0.0002 sec.
===> Epoch[44](505/521): Loss: 1.0766 || Timer: 0.0002 sec.
===> Epoch[44](506/521): Loss: 1.3592 || Timer: 0.0003 sec.
===> Epoch[44](507/521): Loss: 0.9256 ||

===> Epoch[45](107/521): Loss: 1.0960 || Timer: 0.0002 sec.
===> Epoch[45](108/521): Loss: 0.8958 || Timer: 0.0002 sec.
===> Epoch[45](109/521): Loss: 0.8920 || Timer: 0.0002 sec.
===> Epoch[45](110/521): Loss: 1.0276 || Timer: 0.0002 sec.
===> Epoch[45](111/521): Loss: 0.9880 || Timer: 0.0002 sec.
===> Epoch[45](112/521): Loss: 1.0806 || Timer: 0.0002 sec.
===> Epoch[45](113/521): Loss: 0.9348 || Timer: 0.0002 sec.
===> Epoch[45](114/521): Loss: 1.1248 || Timer: 0.0002 sec.
===> Epoch[45](115/521): Loss: 1.2111 || Timer: 0.0002 sec.
===> Epoch[45](116/521): Loss: 0.8769 || Timer: 0.0002 sec.
===> Epoch[45](117/521): Loss: 1.1289 || Timer: 0.0002 sec.
===> Epoch[45](118/521): Loss: 0.8446 || Timer: 0.0002 sec.
===> Epoch[45](119/521): Loss: 1.2979 || Timer: 0.0002 sec.
===> Epoch[45](120/521): Loss: 0.9332 || Timer: 0.0002 sec.
===> Epoch[45](121/521): Loss: 1.2534 || Timer: 0.0002 sec.
===> Epoch[45](122/521): Loss: 1.1034 || Timer: 0.0002 sec.
===> Epoch[45](123/521): Loss: 1.0694 ||

===> Epoch[45](244/521): Loss: 1.0631 || Timer: 0.0002 sec.
===> Epoch[45](245/521): Loss: 1.0911 || Timer: 0.0002 sec.
===> Epoch[45](246/521): Loss: 0.9139 || Timer: 0.0002 sec.
===> Epoch[45](247/521): Loss: 1.3214 || Timer: 0.0002 sec.
===> Epoch[45](248/521): Loss: 1.1542 || Timer: 0.0002 sec.
===> Epoch[45](249/521): Loss: 1.1565 || Timer: 0.0002 sec.
===> Epoch[45](250/521): Loss: 0.8399 || Timer: 0.0002 sec.
===> Epoch[45](251/521): Loss: 1.0030 || Timer: 0.0002 sec.
===> Epoch[45](252/521): Loss: 1.0509 || Timer: 0.0002 sec.
===> Epoch[45](253/521): Loss: 1.1498 || Timer: 0.0002 sec.
===> Epoch[45](254/521): Loss: 1.0601 || Timer: 0.0002 sec.
===> Epoch[45](255/521): Loss: 1.4271 || Timer: 0.0002 sec.
===> Epoch[45](256/521): Loss: 1.1205 || Timer: 0.0002 sec.
===> Epoch[45](257/521): Loss: 1.1041 || Timer: 0.0002 sec.
===> Epoch[45](258/521): Loss: 0.8295 || Timer: 0.0002 sec.
===> Epoch[45](259/521): Loss: 1.0588 || Timer: 0.0002 sec.
===> Epoch[45](260/521): Loss: 0.9847 ||

===> Epoch[45](381/521): Loss: 1.0055 || Timer: 0.0002 sec.
===> Epoch[45](382/521): Loss: 1.1068 || Timer: 0.0002 sec.
===> Epoch[45](383/521): Loss: 1.0636 || Timer: 0.0002 sec.
===> Epoch[45](384/521): Loss: 1.0867 || Timer: 0.0002 sec.
===> Epoch[45](385/521): Loss: 1.1967 || Timer: 0.0002 sec.
===> Epoch[45](386/521): Loss: 1.0963 || Timer: 0.0002 sec.
===> Epoch[45](387/521): Loss: 1.0286 || Timer: 0.0002 sec.
===> Epoch[45](388/521): Loss: 1.0986 || Timer: 0.0002 sec.
===> Epoch[45](389/521): Loss: 0.9269 || Timer: 0.0002 sec.
===> Epoch[45](390/521): Loss: 0.8974 || Timer: 0.0002 sec.
===> Epoch[45](391/521): Loss: 1.0693 || Timer: 0.0002 sec.
===> Epoch[45](392/521): Loss: 1.1246 || Timer: 0.0002 sec.
===> Epoch[45](393/521): Loss: 1.0438 || Timer: 0.0002 sec.
===> Epoch[45](394/521): Loss: 1.0408 || Timer: 0.0002 sec.
===> Epoch[45](395/521): Loss: 0.8933 || Timer: 0.0002 sec.
===> Epoch[45](396/521): Loss: 0.9831 || Timer: 0.0002 sec.
===> Epoch[45](397/521): Loss: 0.8841 ||

===> Epoch[45](518/521): Loss: 1.3175 || Timer: 0.0002 sec.
===> Epoch[45](519/521): Loss: 0.8083 || Timer: 0.0002 sec.
===> Epoch[45](520/521): Loss: 0.8410 || Timer: 0.0002 sec.
===> Epoch 45 Complete: Avg. Loss: 0.8526 | Time: 0.3858
[Epoch 45] Best accuracy: 0.5843
===> Epoch[46](0/521): Loss: 0.7977 || Timer: 0.0001 sec.
===> Epoch[46](1/521): Loss: 1.0067 || Timer: 0.0002 sec.
===> Epoch[46](2/521): Loss: 0.9638 || Timer: 0.0002 sec.
===> Epoch[46](3/521): Loss: 0.7895 || Timer: 0.0002 sec.
===> Epoch[46](4/521): Loss: 0.9788 || Timer: 0.0002 sec.
===> Epoch[46](5/521): Loss: 0.9992 || Timer: 0.0002 sec.
===> Epoch[46](6/521): Loss: 1.1299 || Timer: 0.0002 sec.
===> Epoch[46](7/521): Loss: 1.0655 || Timer: 0.0002 sec.
===> Epoch[46](8/521): Loss: 1.3188 || Timer: 0.0002 sec.
===> Epoch[46](9/521): Loss: 0.8924 || Timer: 0.0002 sec.
===> Epoch[46](10/521): Loss: 0.8824 || Timer: 0.0002 sec.
===> Epoch[46](11/521): Loss: 0.9079 || Timer: 0.0002 sec.
===> Epoch[46](12/521): Loss: 1.

===> Epoch[46](134/521): Loss: 0.8504 || Timer: 0.0003 sec.
===> Epoch[46](135/521): Loss: 0.9675 || Timer: 0.0002 sec.
===> Epoch[46](136/521): Loss: 1.0581 || Timer: 0.0002 sec.
===> Epoch[46](137/521): Loss: 0.8449 || Timer: 0.0002 sec.
===> Epoch[46](138/521): Loss: 0.9840 || Timer: 0.0002 sec.
===> Epoch[46](139/521): Loss: 1.1388 || Timer: 0.0002 sec.
===> Epoch[46](140/521): Loss: 1.0597 || Timer: 0.0002 sec.
===> Epoch[46](141/521): Loss: 0.9473 || Timer: 0.0002 sec.
===> Epoch[46](142/521): Loss: 0.9944 || Timer: 0.0002 sec.
===> Epoch[46](143/521): Loss: 0.9685 || Timer: 0.0002 sec.
===> Epoch[46](144/521): Loss: 1.0217 || Timer: 0.0002 sec.
===> Epoch[46](145/521): Loss: 1.1662 || Timer: 0.0002 sec.
===> Epoch[46](146/521): Loss: 1.1583 || Timer: 0.0002 sec.
===> Epoch[46](147/521): Loss: 1.0059 || Timer: 0.0002 sec.
===> Epoch[46](148/521): Loss: 0.9184 || Timer: 0.0002 sec.
===> Epoch[46](149/521): Loss: 0.8981 || Timer: 0.0002 sec.
===> Epoch[46](150/521): Loss: 0.8115 ||

===> Epoch[46](271/521): Loss: 0.9975 || Timer: 0.0002 sec.
===> Epoch[46](272/521): Loss: 0.9333 || Timer: 0.0002 sec.
===> Epoch[46](273/521): Loss: 1.1036 || Timer: 0.0002 sec.
===> Epoch[46](274/521): Loss: 0.9785 || Timer: 0.0002 sec.
===> Epoch[46](275/521): Loss: 0.9500 || Timer: 0.0002 sec.
===> Epoch[46](276/521): Loss: 0.9951 || Timer: 0.0002 sec.
===> Epoch[46](277/521): Loss: 0.8208 || Timer: 0.0002 sec.
===> Epoch[46](278/521): Loss: 0.8840 || Timer: 0.0002 sec.
===> Epoch[46](279/521): Loss: 1.0810 || Timer: 0.0002 sec.
===> Epoch[46](280/521): Loss: 1.1874 || Timer: 0.0002 sec.
===> Epoch[46](281/521): Loss: 0.9520 || Timer: 0.0002 sec.
===> Epoch[46](282/521): Loss: 0.7911 || Timer: 0.0002 sec.
===> Epoch[46](283/521): Loss: 1.1475 || Timer: 0.0002 sec.
===> Epoch[46](284/521): Loss: 0.9202 || Timer: 0.0002 sec.
===> Epoch[46](285/521): Loss: 0.8594 || Timer: 0.0002 sec.
===> Epoch[46](286/521): Loss: 0.8320 || Timer: 0.0002 sec.
===> Epoch[46](287/521): Loss: 1.0453 ||

===> Epoch[46](408/521): Loss: 1.1868 || Timer: 0.0002 sec.
===> Epoch[46](409/521): Loss: 0.9037 || Timer: 0.0002 sec.
===> Epoch[46](410/521): Loss: 1.1243 || Timer: 0.0002 sec.
===> Epoch[46](411/521): Loss: 1.1404 || Timer: 0.0002 sec.
===> Epoch[46](412/521): Loss: 0.9633 || Timer: 0.0002 sec.
===> Epoch[46](413/521): Loss: 0.9674 || Timer: 0.0002 sec.
===> Epoch[46](414/521): Loss: 1.2128 || Timer: 0.0002 sec.
===> Epoch[46](415/521): Loss: 1.0966 || Timer: 0.0002 sec.
===> Epoch[46](416/521): Loss: 0.9036 || Timer: 0.0002 sec.
===> Epoch[46](417/521): Loss: 1.0119 || Timer: 0.0002 sec.
===> Epoch[46](418/521): Loss: 1.2851 || Timer: 0.0002 sec.
===> Epoch[46](419/521): Loss: 0.9990 || Timer: 0.0002 sec.
===> Epoch[46](420/521): Loss: 1.1270 || Timer: 0.0002 sec.
===> Epoch[46](421/521): Loss: 0.9460 || Timer: 0.0002 sec.
===> Epoch[46](422/521): Loss: 1.0853 || Timer: 0.0002 sec.
===> Epoch[46](423/521): Loss: 1.1485 || Timer: 0.0002 sec.
===> Epoch[46](424/521): Loss: 1.3660 ||

===> Epoch[47](23/521): Loss: 1.0318 || Timer: 0.0002 sec.
===> Epoch[47](24/521): Loss: 0.8498 || Timer: 0.0002 sec.
===> Epoch[47](25/521): Loss: 0.9423 || Timer: 0.0002 sec.
===> Epoch[47](26/521): Loss: 0.6754 || Timer: 0.0002 sec.
===> Epoch[47](27/521): Loss: 1.1112 || Timer: 0.0002 sec.
===> Epoch[47](28/521): Loss: 0.7766 || Timer: 0.0002 sec.
===> Epoch[47](29/521): Loss: 0.6244 || Timer: 0.0002 sec.
===> Epoch[47](30/521): Loss: 0.8716 || Timer: 0.0002 sec.
===> Epoch[47](31/521): Loss: 0.9094 || Timer: 0.0002 sec.
===> Epoch[47](32/521): Loss: 0.6170 || Timer: 0.0002 sec.
===> Epoch[47](33/521): Loss: 0.7583 || Timer: 0.0002 sec.
===> Epoch[47](34/521): Loss: 0.8452 || Timer: 0.0002 sec.
===> Epoch[47](35/521): Loss: 0.7749 || Timer: 0.0003 sec.
===> Epoch[47](36/521): Loss: 0.8099 || Timer: 0.0002 sec.
===> Epoch[47](37/521): Loss: 1.0284 || Timer: 0.0002 sec.
===> Epoch[47](38/521): Loss: 0.9539 || Timer: 0.0002 sec.
===> Epoch[47](39/521): Loss: 0.9131 || Timer: 0.0002 se

===> Epoch[47](161/521): Loss: 1.0887 || Timer: 0.0003 sec.
===> Epoch[47](162/521): Loss: 1.0635 || Timer: 0.0002 sec.
===> Epoch[47](163/521): Loss: 0.9890 || Timer: 0.0002 sec.
===> Epoch[47](164/521): Loss: 0.9269 || Timer: 0.0003 sec.
===> Epoch[47](165/521): Loss: 1.1413 || Timer: 0.0002 sec.
===> Epoch[47](166/521): Loss: 0.9683 || Timer: 0.0002 sec.
===> Epoch[47](167/521): Loss: 0.9123 || Timer: 0.0002 sec.
===> Epoch[47](168/521): Loss: 1.0357 || Timer: 0.0002 sec.
===> Epoch[47](169/521): Loss: 1.0977 || Timer: 0.0002 sec.
===> Epoch[47](170/521): Loss: 0.9979 || Timer: 0.0002 sec.
===> Epoch[47](171/521): Loss: 0.8364 || Timer: 0.0002 sec.
===> Epoch[47](172/521): Loss: 0.8868 || Timer: 0.0002 sec.
===> Epoch[47](173/521): Loss: 0.8387 || Timer: 0.0002 sec.
===> Epoch[47](174/521): Loss: 1.1234 || Timer: 0.0002 sec.
===> Epoch[47](175/521): Loss: 1.3171 || Timer: 0.0002 sec.
===> Epoch[47](176/521): Loss: 1.1286 || Timer: 0.0002 sec.
===> Epoch[47](177/521): Loss: 1.1275 ||

===> Epoch[47](298/521): Loss: 0.8374 || Timer: 0.0002 sec.
===> Epoch[47](299/521): Loss: 1.4176 || Timer: 0.0002 sec.
===> Epoch[47](300/521): Loss: 0.7626 || Timer: 0.0002 sec.
===> Epoch[47](301/521): Loss: 0.8250 || Timer: 0.0002 sec.
===> Epoch[47](302/521): Loss: 0.9012 || Timer: 0.0002 sec.
===> Epoch[47](303/521): Loss: 0.8643 || Timer: 0.0002 sec.
===> Epoch[47](304/521): Loss: 0.9473 || Timer: 0.0002 sec.
===> Epoch[47](305/521): Loss: 1.0484 || Timer: 0.0002 sec.
===> Epoch[47](306/521): Loss: 1.0758 || Timer: 0.0002 sec.
===> Epoch[47](307/521): Loss: 1.0684 || Timer: 0.0002 sec.
===> Epoch[47](308/521): Loss: 0.9562 || Timer: 0.0002 sec.
===> Epoch[47](309/521): Loss: 1.2350 || Timer: 0.0002 sec.
===> Epoch[47](310/521): Loss: 1.3486 || Timer: 0.0002 sec.
===> Epoch[47](311/521): Loss: 1.0248 || Timer: 0.0002 sec.
===> Epoch[47](312/521): Loss: 1.1843 || Timer: 0.0002 sec.
===> Epoch[47](313/521): Loss: 1.0868 || Timer: 0.0002 sec.
===> Epoch[47](314/521): Loss: 1.0710 ||

===> Epoch[47](435/521): Loss: 1.0462 || Timer: 0.0002 sec.
===> Epoch[47](436/521): Loss: 1.1187 || Timer: 0.0002 sec.
===> Epoch[47](437/521): Loss: 0.9927 || Timer: 0.0002 sec.
===> Epoch[47](438/521): Loss: 1.0955 || Timer: 0.0002 sec.
===> Epoch[47](439/521): Loss: 1.2649 || Timer: 0.0002 sec.
===> Epoch[47](440/521): Loss: 1.0964 || Timer: 0.0002 sec.
===> Epoch[47](441/521): Loss: 1.0705 || Timer: 0.0002 sec.
===> Epoch[47](442/521): Loss: 1.1617 || Timer: 0.0002 sec.
===> Epoch[47](443/521): Loss: 1.1146 || Timer: 0.0002 sec.
===> Epoch[47](444/521): Loss: 0.8813 || Timer: 0.0002 sec.
===> Epoch[47](445/521): Loss: 1.2229 || Timer: 0.0002 sec.
===> Epoch[47](446/521): Loss: 1.1218 || Timer: 0.0002 sec.
===> Epoch[47](447/521): Loss: 1.1693 || Timer: 0.0002 sec.
===> Epoch[47](448/521): Loss: 0.8401 || Timer: 0.0002 sec.
===> Epoch[47](449/521): Loss: 0.9674 || Timer: 0.0002 sec.
===> Epoch[47](450/521): Loss: 1.1310 || Timer: 0.0002 sec.
===> Epoch[47](451/521): Loss: 1.0643 ||

===> Epoch[48](51/521): Loss: 0.8150 || Timer: 0.0002 sec.
===> Epoch[48](52/521): Loss: 0.6374 || Timer: 0.0002 sec.
===> Epoch[48](53/521): Loss: 1.2087 || Timer: 0.0002 sec.
===> Epoch[48](54/521): Loss: 0.7995 || Timer: 0.0002 sec.
===> Epoch[48](55/521): Loss: 0.8563 || Timer: 0.0002 sec.
===> Epoch[48](56/521): Loss: 0.8746 || Timer: 0.0003 sec.
===> Epoch[48](57/521): Loss: 0.8467 || Timer: 0.0003 sec.
===> Epoch[48](58/521): Loss: 0.9528 || Timer: 0.0002 sec.
===> Epoch[48](59/521): Loss: 0.9391 || Timer: 0.0002 sec.
===> Epoch[48](60/521): Loss: 0.8218 || Timer: 0.0002 sec.
===> Epoch[48](61/521): Loss: 1.0422 || Timer: 0.0002 sec.
===> Epoch[48](62/521): Loss: 1.0042 || Timer: 0.0002 sec.
===> Epoch[48](63/521): Loss: 0.8910 || Timer: 0.0002 sec.
===> Epoch[48](64/521): Loss: 0.9166 || Timer: 0.0002 sec.
===> Epoch[48](65/521): Loss: 0.8731 || Timer: 0.0002 sec.
===> Epoch[48](66/521): Loss: 0.9012 || Timer: 0.0002 sec.
===> Epoch[48](67/521): Loss: 0.9260 || Timer: 0.0002 se

===> Epoch[48](189/521): Loss: 1.2605 || Timer: 0.0002 sec.
===> Epoch[48](190/521): Loss: 1.0774 || Timer: 0.0002 sec.
===> Epoch[48](191/521): Loss: 1.0967 || Timer: 0.0002 sec.
===> Epoch[48](192/521): Loss: 1.0177 || Timer: 0.0002 sec.
===> Epoch[48](193/521): Loss: 0.9702 || Timer: 0.0003 sec.
===> Epoch[48](194/521): Loss: 1.3191 || Timer: 0.0002 sec.
===> Epoch[48](195/521): Loss: 0.9044 || Timer: 0.0002 sec.
===> Epoch[48](196/521): Loss: 0.9597 || Timer: 0.0002 sec.
===> Epoch[48](197/521): Loss: 0.8548 || Timer: 0.0002 sec.
===> Epoch[48](198/521): Loss: 1.2703 || Timer: 0.0002 sec.
===> Epoch[48](199/521): Loss: 1.1106 || Timer: 0.0002 sec.
===> Epoch[48](200/521): Loss: 1.0933 || Timer: 0.0002 sec.
===> Epoch[48](201/521): Loss: 1.2437 || Timer: 0.0002 sec.
===> Epoch[48](202/521): Loss: 0.8442 || Timer: 0.0002 sec.
===> Epoch[48](203/521): Loss: 0.8449 || Timer: 0.0002 sec.
===> Epoch[48](204/521): Loss: 0.9806 || Timer: 0.0002 sec.
===> Epoch[48](205/521): Loss: 1.0650 ||

===> Epoch[48](326/521): Loss: 1.0409 || Timer: 0.0002 sec.
===> Epoch[48](327/521): Loss: 0.8718 || Timer: 0.0002 sec.
===> Epoch[48](328/521): Loss: 0.8239 || Timer: 0.0002 sec.
===> Epoch[48](329/521): Loss: 1.0652 || Timer: 0.0002 sec.
===> Epoch[48](330/521): Loss: 1.0753 || Timer: 0.0002 sec.
===> Epoch[48](331/521): Loss: 0.9749 || Timer: 0.0002 sec.
===> Epoch[48](332/521): Loss: 0.9642 || Timer: 0.0002 sec.
===> Epoch[48](333/521): Loss: 1.0244 || Timer: 0.0002 sec.
===> Epoch[48](334/521): Loss: 1.1924 || Timer: 0.0002 sec.
===> Epoch[48](335/521): Loss: 1.0041 || Timer: 0.0002 sec.
===> Epoch[48](336/521): Loss: 0.9755 || Timer: 0.0002 sec.
===> Epoch[48](337/521): Loss: 0.7882 || Timer: 0.0002 sec.
===> Epoch[48](338/521): Loss: 1.4328 || Timer: 0.0002 sec.
===> Epoch[48](339/521): Loss: 1.1511 || Timer: 0.0002 sec.
===> Epoch[48](340/521): Loss: 1.0737 || Timer: 0.0002 sec.
===> Epoch[48](341/521): Loss: 1.2708 || Timer: 0.0002 sec.
===> Epoch[48](342/521): Loss: 1.0061 ||

===> Epoch[48](463/521): Loss: 0.9321 || Timer: 0.0002 sec.
===> Epoch[48](464/521): Loss: 0.9219 || Timer: 0.0002 sec.
===> Epoch[48](465/521): Loss: 0.9374 || Timer: 0.0002 sec.
===> Epoch[48](466/521): Loss: 1.1414 || Timer: 0.0002 sec.
===> Epoch[48](467/521): Loss: 0.8819 || Timer: 0.0002 sec.
===> Epoch[48](468/521): Loss: 1.2697 || Timer: 0.0002 sec.
===> Epoch[48](469/521): Loss: 1.1055 || Timer: 0.0002 sec.
===> Epoch[48](470/521): Loss: 1.1801 || Timer: 0.0002 sec.
===> Epoch[48](471/521): Loss: 1.0187 || Timer: 0.0002 sec.
===> Epoch[48](472/521): Loss: 0.9553 || Timer: 0.0002 sec.
===> Epoch[48](473/521): Loss: 0.9356 || Timer: 0.0002 sec.
===> Epoch[48](474/521): Loss: 0.9938 || Timer: 0.0002 sec.
===> Epoch[48](475/521): Loss: 1.0080 || Timer: 0.0002 sec.
===> Epoch[48](476/521): Loss: 1.1397 || Timer: 0.0002 sec.
===> Epoch[48](477/521): Loss: 0.9860 || Timer: 0.0002 sec.
===> Epoch[48](478/521): Loss: 1.0612 || Timer: 0.0002 sec.
===> Epoch[48](479/521): Loss: 1.0933 ||

===> Epoch[49](79/521): Loss: 0.8061 || Timer: 0.0002 sec.
===> Epoch[49](80/521): Loss: 0.9411 || Timer: 0.0002 sec.
===> Epoch[49](81/521): Loss: 0.8341 || Timer: 0.0002 sec.
===> Epoch[49](82/521): Loss: 0.8954 || Timer: 0.0002 sec.
===> Epoch[49](83/521): Loss: 0.8980 || Timer: 0.0002 sec.
===> Epoch[49](84/521): Loss: 0.9922 || Timer: 0.0002 sec.
===> Epoch[49](85/521): Loss: 0.8444 || Timer: 0.0002 sec.
===> Epoch[49](86/521): Loss: 1.0417 || Timer: 0.0002 sec.
===> Epoch[49](87/521): Loss: 0.9368 || Timer: 0.0002 sec.
===> Epoch[49](88/521): Loss: 0.7884 || Timer: 0.0002 sec.
===> Epoch[49](89/521): Loss: 0.7482 || Timer: 0.0002 sec.
===> Epoch[49](90/521): Loss: 1.0167 || Timer: 0.0002 sec.
===> Epoch[49](91/521): Loss: 1.0351 || Timer: 0.0002 sec.
===> Epoch[49](92/521): Loss: 0.8826 || Timer: 0.0001 sec.
===> Epoch[49](93/521): Loss: 0.8911 || Timer: 0.0001 sec.
===> Epoch[49](94/521): Loss: 0.9961 || Timer: 0.0002 sec.
===> Epoch[49](95/521): Loss: 1.1448 || Timer: 0.0001 se