# Training ResNeXt

This Notebook will go through the process of training the ResNeXt model with an architecture that is optimized for the Cifar Dataset (as specified in [1] https://arxiv.org/pdf/1611.05431.pdf).

In [1]:
import os
import sys
import json
import shutil
import torch
import torch.nn.functional as F
import torchvision.datasets as dset
import torchvision.transforms as transforms
import time

In [2]:
sys.path.append("..") # Note, this line is needed only since this notebook is being run from inside 'bharat' directory
                      # If you run this notebook in the same folder as the the 'models-py-code' directory, remove this line
from pymodels.resnext import CifarResNeXt # This is where we actually refer to the model we wish to train

## Set all required parameters
Remember to change the `session_id` for each new model you wish to train or if you change of the other parameters

In [3]:
args={}

# Input Data Parameters
args['data_path'] = '/datasets/ee285s-public/'
args['dataset'] = 'cifar100' # The other option is 'cifar10'

# Optimization options
args['epochs'] = 150
args['start_epoch'] = 0 # Change only if necessary. Used for resuming sessions
args['batch_size'] = 96
args['learning_rate'] = 0.1
args['momentum'] = 0.9 # Momentum
args['decay'] = 0.0005 # Weight decay (L2 penalty)
args['test_bs'] = 64 # Test Batch Size
args['schedule'] = [75, 115] # Decrease learning rate at these epochs
args['gamma'] = 0.1 # LR is multiplied by gamma on schedule.

# Checkpoints and Session Parameters
args['save_dir'] = './' # Folder to save checkpoint file and best model file
args['load'] = 'resnext-0-checkpoint.pth' # Path to load Checkpoint file 
args['session_id'] = '0' # Remember to change the session id for each new model you train

# Architecture
args['depth'] = 29 # Model depth
args['cardinality'] = 8 # Model cardinality (group)
args['base_width'] = 64 # Number of channels in each group
args['widen_factor'] = 4 # Widen factor. 4 -> 64, 8 -> 128, ...

# Acceleration
args['ngpu'] = 1 # 0 = CPU
args['prefetch'] = 2 # Pre-fetching threads

# i/o
args['log_dir'] = './log/' # Log folder

# Create a Dictionary to describe the state every epoch
epoch_state = {'args' : args}

### Initialize Logging

In [4]:
if not os.path.isdir(args['log_dir']):
        os.makedirs(args['log_dir'])

log_file_name = 'resnext-{}-log.txt'.format(args['session_id'])
if args['load'] == '':
    log = open(os.path.join(args['log_dir'], log_file_name), 'w')
else:
    log = open(os.path.join(args['log_dir'], log_file_name), 'a')    
print('Exisiting log {} loaded.'.format(log_file_name))

Exisiting log resnext-0-log.txt loaded.


### Setup the Input Data

In [5]:
mean = [x / 255 for x in [125.3, 123.0, 113.9]]
std = [x / 255 for x in [63.0, 62.1, 66.7]]

train_transform = transforms.Compose(
    [transforms.RandomHorizontalFlip(),
     transforms.RandomCrop(32, padding=4),
     transforms.ToTensor(),
     transforms.Normalize(mean, std)])

test_transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize(mean, std)])

if args['dataset'] == 'cifar10':
    train_data = dset.CIFAR10(args['data_path'], train=True, transform=train_transform, download=False)
    test_data = dset.CIFAR10(args['data_path'], train=False, transform=test_transform, download=False)
    nlabels = 10
else:
    train_data = dset.CIFAR100(args['data_path'], train=True, transform=train_transform, download=False)
    test_data = dset.CIFAR100(args['data_path'], train=False, transform=test_transform, download=False)
    nlabels = 100

train_loader = torch.utils.data.DataLoader(train_data, batch_size=args['batch_size'], shuffle=True,
                                                                       num_workers=args['prefetch'], pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=args['test_bs'], shuffle=False,
                                                                     num_workers=args['prefetch'], pin_memory=True)

### Setup the Model, Criterion, and Optimizer

In [6]:
resnext_model = CifarResNeXt(args['cardinality'], args['depth'], nlabels, args['base_width'], args['widen_factor'])
print(resnext_model)
if args['ngpu'] > 1:
    resnext_model = torch.nn.DataParallel(resnext_model, device_ids=list(range(args['ngpu'])))

if args['ngpu'] > 0:
    resnext_model.cuda()

optimizer = torch.optim.SGD(resnext_model.parameters(), args['learning_rate'], momentum=args['momentum'],
                            weight_decay=args['decay'], nesterov=True)

CifarResNeXt(
  (conv_1_3x3): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
  (stage_1): Sequential(
    (stage_1_bottleneck_0): ResNeXtBottleneck(
      (conv_reduce): Conv2d(64, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn_reduce): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True)
      (conv_conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=8, bias=False)
      (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True)
      (conv_expand): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn_expand): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      (shortcut): Sequential(
        (shortcut_conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (shortcut_bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True)
      )
    )
    (stage_1_bottleneck_1): ResNeXtBottleneck(
      (

### Setup the Train and Test Functions

In [7]:
def train(epoch):
    resnext_model.train()
    loss_avg = 0.0
    t0 = time.time()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = torch.autograd.Variable(data.cuda()), torch.autograd.Variable(target.cuda())

        # forward
        output = resnext_model(data)

        # backward
        optimizer.zero_grad()
        t0 = time.time()
        loss = F.cross_entropy(output, target)
        t1 = time.time()
        loss.backward()
        optimizer.step()

        # exponential moving average
        loss_avg = loss_avg * 0.2 + float(loss) * 0.8

        print("===> Epoch[{}]({}/{}): Loss: {:.4f} || Timer: {:.4f} sec.".format(epoch, batch_idx, len(train_loader), loss.data[0], (t1 - t0)))

    epoch_state['train_loss'] = loss_avg
    t1 = time.time()
    print("===> Epoch {} Complete: Avg. Loss: {:.4f} | Time: {:.4f}".format(epoch, loss_avg, (t1-t0)))

# test function (forward only)
def test():
    resnext_model.eval()
    loss_avg = 0.0
    correct = 0
    for batch_idx, (data, target) in enumerate(test_loader):
        data, target = torch.autograd.Variable(data.cuda()), torch.autograd.Variable(target.cuda())

        # forward
        output = resnext_model(data)
        loss = F.cross_entropy(output, target)

        # accuracy
        pred = output.data.max(1)[1]
        correct += float(pred.eq(target.data).sum())

        # test loss average
        loss_avg += float(loss)

    epoch_state['test_loss'] = loss_avg / len(test_loader)
    epoch_state['test_accuracy'] = correct / len(test_loader.dataset)

### Define a function to save a checkpoint

In [8]:
def save_checkpoint(state, is_best, filename='resnext-{}-checkpoint.pth'.format(args['session_id'])):
    filepath = os.path.join(args['save_dir'], filename)
    torch.save(state, filepath)
    if is_best:
        bestfilepath = os.path.join(args['save_dir'], 'model-best-resnext-{}.pth'.format(args['session_id']))
        shutil.copyfile(filepath, bestfilepath)

### Load and Resume from checkpoint (if set)

In [9]:
if not args['load'] == '':
    if os.path.isfile(args['load']):
        print("=> Loading Checkpoint '{}'".format(args['load']))
        checkpoint = torch.load(args['load'])
        args['start_epoch'] = checkpoint['epoch'] + 1
        args['learning_rate'] = checkpoint['learning_rate']
        resnext_model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print("=> Loaded checkpoint '{}' (epoch {})"
              .format(args['load'], checkpoint['epoch']))
    else:
        print("=> No checkpoint found at '{}'".format(args['load']))

=> Loading Checkpoint 'resnext-0-checkpoint.pth'
=> Loaded checkpoint 'resnext-0-checkpoint.pth' (epoch 139)


### Main Loop

In [10]:
best_accuracy = 0.0

for epoch in range(args['start_epoch'], args['epochs']):
    if epoch in args['schedule']:
        args['learning_rate'] *= args['gamma']
        for param_group in optimizer.param_groups:
            param_group['lr'] = args['learning_rate']

    epoch_state['epoch'] = epoch
    train(epoch)
    test()
    if epoch_state['test_accuracy'] > best_accuracy:
        best_accuracy = epoch_state['test_accuracy']
        isbest = True
    
    save_checkpoint({
        'epoch' : epoch,
        'learning_rate' : args['learning_rate'],
        'state_dict' : resnext_model.state_dict(),
        'optimizer' : optimizer.state_dict()
    }, isbest)
    
    log.write('%s\n' % json.dumps(epoch_state))
    log.flush()
    #print(epoch_state)
    print("[Epoch {}] Best accuracy: {:.4f}".format(epoch, best_accuracy))

log.close()

===> Epoch[140](0/521): Loss: 0.0188 || Timer: 0.0006 sec.
===> Epoch[140](1/521): Loss: 0.0122 || Timer: 0.0002 sec.
===> Epoch[140](2/521): Loss: 0.0080 || Timer: 0.0002 sec.
===> Epoch[140](3/521): Loss: 0.0052 || Timer: 0.0001 sec.
===> Epoch[140](4/521): Loss: 0.0128 || Timer: 0.0002 sec.
===> Epoch[140](5/521): Loss: 0.0050 || Timer: 0.0002 sec.
===> Epoch[140](6/521): Loss: 0.0047 || Timer: 0.0002 sec.
===> Epoch[140](7/521): Loss: 0.0109 || Timer: 0.0002 sec.
===> Epoch[140](8/521): Loss: 0.0124 || Timer: 0.0001 sec.
===> Epoch[140](9/521): Loss: 0.0100 || Timer: 0.0001 sec.
===> Epoch[140](10/521): Loss: 0.0070 || Timer: 0.0001 sec.
===> Epoch[140](11/521): Loss: 0.0068 || Timer: 0.0001 sec.
===> Epoch[140](12/521): Loss: 0.0061 || Timer: 0.0001 sec.
===> Epoch[140](13/521): Loss: 0.0092 || Timer: 0.0001 sec.
===> Epoch[140](14/521): Loss: 0.0108 || Timer: 0.0002 sec.
===> Epoch[140](15/521): Loss: 0.0090 || Timer: 0.0001 sec.
===> Epoch[140](16/521): Loss: 0.0067 || Timer: 0.

===> Epoch[140](137/521): Loss: 0.0180 || Timer: 0.0002 sec.
===> Epoch[140](138/521): Loss: 0.0071 || Timer: 0.0002 sec.
===> Epoch[140](139/521): Loss: 0.0053 || Timer: 0.0001 sec.
===> Epoch[140](140/521): Loss: 0.0104 || Timer: 0.0001 sec.
===> Epoch[140](141/521): Loss: 0.0074 || Timer: 0.0001 sec.
===> Epoch[140](142/521): Loss: 0.0053 || Timer: 0.0002 sec.
===> Epoch[140](143/521): Loss: 0.0053 || Timer: 0.0001 sec.
===> Epoch[140](144/521): Loss: 0.0089 || Timer: 0.0002 sec.
===> Epoch[140](145/521): Loss: 0.0080 || Timer: 0.0003 sec.
===> Epoch[140](146/521): Loss: 0.0097 || Timer: 0.0002 sec.
===> Epoch[140](147/521): Loss: 0.0228 || Timer: 0.0002 sec.
===> Epoch[140](148/521): Loss: 0.0069 || Timer: 0.0002 sec.
===> Epoch[140](149/521): Loss: 0.0098 || Timer: 0.0002 sec.
===> Epoch[140](150/521): Loss: 0.0070 || Timer: 0.0002 sec.
===> Epoch[140](151/521): Loss: 0.0072 || Timer: 0.0002 sec.
===> Epoch[140](152/521): Loss: 0.0132 || Timer: 0.0002 sec.
===> Epoch[140](153/521)

===> Epoch[140](272/521): Loss: 0.0072 || Timer: 0.0003 sec.
===> Epoch[140](273/521): Loss: 0.0073 || Timer: 0.0003 sec.
===> Epoch[140](274/521): Loss: 0.0083 || Timer: 0.0003 sec.
===> Epoch[140](275/521): Loss: 0.0140 || Timer: 0.0003 sec.
===> Epoch[140](276/521): Loss: 0.0099 || Timer: 0.0003 sec.
===> Epoch[140](277/521): Loss: 0.0097 || Timer: 0.0003 sec.
===> Epoch[140](278/521): Loss: 0.0079 || Timer: 0.0003 sec.
===> Epoch[140](279/521): Loss: 0.0070 || Timer: 0.0003 sec.
===> Epoch[140](280/521): Loss: 0.0085 || Timer: 0.0003 sec.
===> Epoch[140](281/521): Loss: 0.0181 || Timer: 0.0003 sec.
===> Epoch[140](282/521): Loss: 0.0065 || Timer: 0.0003 sec.
===> Epoch[140](283/521): Loss: 0.0107 || Timer: 0.0003 sec.
===> Epoch[140](284/521): Loss: 0.0092 || Timer: 0.0003 sec.
===> Epoch[140](285/521): Loss: 0.0116 || Timer: 0.0003 sec.
===> Epoch[140](286/521): Loss: 0.0056 || Timer: 0.0003 sec.
===> Epoch[140](287/521): Loss: 0.0129 || Timer: 0.0002 sec.
===> Epoch[140](288/521)

===> Epoch[140](407/521): Loss: 0.0074 || Timer: 0.0001 sec.
===> Epoch[140](408/521): Loss: 0.0087 || Timer: 0.0002 sec.
===> Epoch[140](409/521): Loss: 0.0079 || Timer: 0.0002 sec.
===> Epoch[140](410/521): Loss: 0.0092 || Timer: 0.0002 sec.
===> Epoch[140](411/521): Loss: 0.0079 || Timer: 0.0002 sec.
===> Epoch[140](412/521): Loss: 0.0148 || Timer: 0.0002 sec.
===> Epoch[140](413/521): Loss: 0.0088 || Timer: 0.0002 sec.
===> Epoch[140](414/521): Loss: 0.0136 || Timer: 0.0002 sec.
===> Epoch[140](415/521): Loss: 0.0098 || Timer: 0.0002 sec.
===> Epoch[140](416/521): Loss: 0.0081 || Timer: 0.0001 sec.
===> Epoch[140](417/521): Loss: 0.0089 || Timer: 0.0002 sec.
===> Epoch[140](418/521): Loss: 0.0060 || Timer: 0.0002 sec.
===> Epoch[140](419/521): Loss: 0.0090 || Timer: 0.0001 sec.
===> Epoch[140](420/521): Loss: 0.0072 || Timer: 0.0001 sec.
===> Epoch[140](421/521): Loss: 0.0154 || Timer: 0.0002 sec.
===> Epoch[140](422/521): Loss: 0.0069 || Timer: 0.0001 sec.
===> Epoch[140](423/521)

===> Epoch[141](20/521): Loss: 0.0118 || Timer: 0.0002 sec.
===> Epoch[141](21/521): Loss: 0.0057 || Timer: 0.0002 sec.
===> Epoch[141](22/521): Loss: 0.0277 || Timer: 0.0002 sec.
===> Epoch[141](23/521): Loss: 0.0050 || Timer: 0.0002 sec.
===> Epoch[141](24/521): Loss: 0.0078 || Timer: 0.0002 sec.
===> Epoch[141](25/521): Loss: 0.0134 || Timer: 0.0002 sec.
===> Epoch[141](26/521): Loss: 0.0058 || Timer: 0.0002 sec.
===> Epoch[141](27/521): Loss: 0.0088 || Timer: 0.0002 sec.
===> Epoch[141](28/521): Loss: 0.0091 || Timer: 0.0002 sec.
===> Epoch[141](29/521): Loss: 0.0238 || Timer: 0.0002 sec.
===> Epoch[141](30/521): Loss: 0.0074 || Timer: 0.0002 sec.
===> Epoch[141](31/521): Loss: 0.0076 || Timer: 0.0002 sec.
===> Epoch[141](32/521): Loss: 0.0077 || Timer: 0.0002 sec.
===> Epoch[141](33/521): Loss: 0.0069 || Timer: 0.0002 sec.
===> Epoch[141](34/521): Loss: 0.0095 || Timer: 0.0002 sec.
===> Epoch[141](35/521): Loss: 0.0068 || Timer: 0.0002 sec.
===> Epoch[141](36/521): Loss: 0.0080 ||

===> Epoch[141](156/521): Loss: 0.0103 || Timer: 0.0001 sec.
===> Epoch[141](157/521): Loss: 0.0157 || Timer: 0.0001 sec.
===> Epoch[141](158/521): Loss: 0.0070 || Timer: 0.0002 sec.
===> Epoch[141](159/521): Loss: 0.0072 || Timer: 0.0001 sec.
===> Epoch[141](160/521): Loss: 0.0087 || Timer: 0.0002 sec.
===> Epoch[141](161/521): Loss: 0.0099 || Timer: 0.0002 sec.
===> Epoch[141](162/521): Loss: 0.0086 || Timer: 0.0002 sec.
===> Epoch[141](163/521): Loss: 0.0055 || Timer: 0.0001 sec.
===> Epoch[141](164/521): Loss: 0.0073 || Timer: 0.0002 sec.
===> Epoch[141](165/521): Loss: 0.0080 || Timer: 0.0001 sec.
===> Epoch[141](166/521): Loss: 0.0056 || Timer: 0.0002 sec.
===> Epoch[141](167/521): Loss: 0.0073 || Timer: 0.0002 sec.
===> Epoch[141](168/521): Loss: 0.0077 || Timer: 0.0002 sec.
===> Epoch[141](169/521): Loss: 0.0091 || Timer: 0.0002 sec.
===> Epoch[141](170/521): Loss: 0.0072 || Timer: 0.0002 sec.
===> Epoch[141](171/521): Loss: 0.0086 || Timer: 0.0001 sec.
===> Epoch[141](172/521)

===> Epoch[141](291/521): Loss: 0.0065 || Timer: 0.0001 sec.
===> Epoch[141](292/521): Loss: 0.0077 || Timer: 0.0002 sec.
===> Epoch[141](293/521): Loss: 0.0136 || Timer: 0.0002 sec.
===> Epoch[141](294/521): Loss: 0.0089 || Timer: 0.0001 sec.
===> Epoch[141](295/521): Loss: 0.0055 || Timer: 0.0002 sec.
===> Epoch[141](296/521): Loss: 0.0054 || Timer: 0.0002 sec.
===> Epoch[141](297/521): Loss: 0.0060 || Timer: 0.0002 sec.
===> Epoch[141](298/521): Loss: 0.0092 || Timer: 0.0002 sec.
===> Epoch[141](299/521): Loss: 0.0124 || Timer: 0.0002 sec.
===> Epoch[141](300/521): Loss: 0.0088 || Timer: 0.0002 sec.
===> Epoch[141](301/521): Loss: 0.0083 || Timer: 0.0002 sec.
===> Epoch[141](302/521): Loss: 0.0114 || Timer: 0.0002 sec.
===> Epoch[141](303/521): Loss: 0.0075 || Timer: 0.0002 sec.
===> Epoch[141](304/521): Loss: 0.0142 || Timer: 0.0002 sec.
===> Epoch[141](305/521): Loss: 0.0117 || Timer: 0.0002 sec.
===> Epoch[141](306/521): Loss: 0.0062 || Timer: 0.0002 sec.
===> Epoch[141](307/521)

===> Epoch[141](426/521): Loss: 0.0059 || Timer: 0.0002 sec.
===> Epoch[141](427/521): Loss: 0.0112 || Timer: 0.0001 sec.
===> Epoch[141](428/521): Loss: 0.0060 || Timer: 0.0001 sec.
===> Epoch[141](429/521): Loss: 0.0104 || Timer: 0.0002 sec.
===> Epoch[141](430/521): Loss: 0.0133 || Timer: 0.0001 sec.
===> Epoch[141](431/521): Loss: 0.0134 || Timer: 0.0001 sec.
===> Epoch[141](432/521): Loss: 0.0056 || Timer: 0.0001 sec.
===> Epoch[141](433/521): Loss: 0.0175 || Timer: 0.0001 sec.
===> Epoch[141](434/521): Loss: 0.0114 || Timer: 0.0002 sec.
===> Epoch[141](435/521): Loss: 0.0107 || Timer: 0.0002 sec.
===> Epoch[141](436/521): Loss: 0.0088 || Timer: 0.0002 sec.
===> Epoch[141](437/521): Loss: 0.0042 || Timer: 0.0002 sec.
===> Epoch[141](438/521): Loss: 0.0153 || Timer: 0.0002 sec.
===> Epoch[141](439/521): Loss: 0.0112 || Timer: 0.0001 sec.
===> Epoch[141](440/521): Loss: 0.0100 || Timer: 0.0002 sec.
===> Epoch[141](441/521): Loss: 0.0057 || Timer: 0.0002 sec.
===> Epoch[141](442/521)

===> Epoch[142](39/521): Loss: 0.0058 || Timer: 0.0001 sec.
===> Epoch[142](40/521): Loss: 0.0097 || Timer: 0.0002 sec.
===> Epoch[142](41/521): Loss: 0.0098 || Timer: 0.0001 sec.
===> Epoch[142](42/521): Loss: 0.0144 || Timer: 0.0002 sec.
===> Epoch[142](43/521): Loss: 0.0065 || Timer: 0.0002 sec.
===> Epoch[142](44/521): Loss: 0.0085 || Timer: 0.0003 sec.
===> Epoch[142](45/521): Loss: 0.0142 || Timer: 0.0002 sec.
===> Epoch[142](46/521): Loss: 0.0108 || Timer: 0.0002 sec.
===> Epoch[142](47/521): Loss: 0.0109 || Timer: 0.0002 sec.
===> Epoch[142](48/521): Loss: 0.0081 || Timer: 0.0002 sec.
===> Epoch[142](49/521): Loss: 0.0077 || Timer: 0.0002 sec.
===> Epoch[142](50/521): Loss: 0.0096 || Timer: 0.0002 sec.
===> Epoch[142](51/521): Loss: 0.0082 || Timer: 0.0002 sec.
===> Epoch[142](52/521): Loss: 0.0059 || Timer: 0.0002 sec.
===> Epoch[142](53/521): Loss: 0.0062 || Timer: 0.0001 sec.
===> Epoch[142](54/521): Loss: 0.0073 || Timer: 0.0001 sec.
===> Epoch[142](55/521): Loss: 0.0159 ||

===> Epoch[142](175/521): Loss: 0.0094 || Timer: 0.0002 sec.
===> Epoch[142](176/521): Loss: 0.0074 || Timer: 0.0002 sec.
===> Epoch[142](177/521): Loss: 0.0130 || Timer: 0.0002 sec.
===> Epoch[142](178/521): Loss: 0.0062 || Timer: 0.0002 sec.
===> Epoch[142](179/521): Loss: 0.0069 || Timer: 0.0002 sec.
===> Epoch[142](180/521): Loss: 0.0210 || Timer: 0.0002 sec.
===> Epoch[142](181/521): Loss: 0.0083 || Timer: 0.0002 sec.
===> Epoch[142](182/521): Loss: 0.0095 || Timer: 0.0002 sec.
===> Epoch[142](183/521): Loss: 0.0151 || Timer: 0.0002 sec.
===> Epoch[142](184/521): Loss: 0.0108 || Timer: 0.0002 sec.
===> Epoch[142](185/521): Loss: 0.0059 || Timer: 0.0002 sec.
===> Epoch[142](186/521): Loss: 0.0071 || Timer: 0.0002 sec.
===> Epoch[142](187/521): Loss: 0.0476 || Timer: 0.0002 sec.
===> Epoch[142](188/521): Loss: 0.0050 || Timer: 0.0002 sec.
===> Epoch[142](189/521): Loss: 0.0091 || Timer: 0.0002 sec.
===> Epoch[142](190/521): Loss: 0.0077 || Timer: 0.0002 sec.
===> Epoch[142](191/521)

===> Epoch[142](310/521): Loss: 0.0059 || Timer: 0.0002 sec.
===> Epoch[142](311/521): Loss: 0.0106 || Timer: 0.0003 sec.
===> Epoch[142](312/521): Loss: 0.0116 || Timer: 0.0003 sec.
===> Epoch[142](313/521): Loss: 0.0093 || Timer: 0.0003 sec.
===> Epoch[142](314/521): Loss: 0.0079 || Timer: 0.0004 sec.
===> Epoch[142](315/521): Loss: 0.0080 || Timer: 0.0003 sec.
===> Epoch[142](316/521): Loss: 0.0054 || Timer: 0.0003 sec.
===> Epoch[142](317/521): Loss: 0.0103 || Timer: 0.0003 sec.
===> Epoch[142](318/521): Loss: 0.0104 || Timer: 0.0002 sec.
===> Epoch[142](319/521): Loss: 0.0062 || Timer: 0.0002 sec.
===> Epoch[142](320/521): Loss: 0.0111 || Timer: 0.0003 sec.
===> Epoch[142](321/521): Loss: 0.0131 || Timer: 0.0003 sec.
===> Epoch[142](322/521): Loss: 0.0108 || Timer: 0.0004 sec.
===> Epoch[142](323/521): Loss: 0.0098 || Timer: 0.0004 sec.
===> Epoch[142](324/521): Loss: 0.0057 || Timer: 0.0004 sec.
===> Epoch[142](325/521): Loss: 0.0188 || Timer: 0.0003 sec.
===> Epoch[142](326/521)

===> Epoch[142](445/521): Loss: 0.0071 || Timer: 0.0002 sec.
===> Epoch[142](446/521): Loss: 0.0085 || Timer: 0.0002 sec.
===> Epoch[142](447/521): Loss: 0.0067 || Timer: 0.0002 sec.
===> Epoch[142](448/521): Loss: 0.0074 || Timer: 0.0002 sec.
===> Epoch[142](449/521): Loss: 0.0170 || Timer: 0.0002 sec.
===> Epoch[142](450/521): Loss: 0.0120 || Timer: 0.0002 sec.
===> Epoch[142](451/521): Loss: 0.0099 || Timer: 0.0002 sec.
===> Epoch[142](452/521): Loss: 0.0094 || Timer: 0.0002 sec.
===> Epoch[142](453/521): Loss: 0.0067 || Timer: 0.0002 sec.
===> Epoch[142](454/521): Loss: 0.0288 || Timer: 0.0002 sec.
===> Epoch[142](455/521): Loss: 0.0053 || Timer: 0.0002 sec.
===> Epoch[142](456/521): Loss: 0.0141 || Timer: 0.0002 sec.
===> Epoch[142](457/521): Loss: 0.0094 || Timer: 0.0002 sec.
===> Epoch[142](458/521): Loss: 0.0113 || Timer: 0.0002 sec.
===> Epoch[142](459/521): Loss: 0.0136 || Timer: 0.0002 sec.
===> Epoch[142](460/521): Loss: 0.0088 || Timer: 0.0002 sec.
===> Epoch[142](461/521)

===> Epoch[143](58/521): Loss: 0.0073 || Timer: 0.0002 sec.
===> Epoch[143](59/521): Loss: 0.0092 || Timer: 0.0002 sec.
===> Epoch[143](60/521): Loss: 0.0069 || Timer: 0.0002 sec.
===> Epoch[143](61/521): Loss: 0.0052 || Timer: 0.0002 sec.
===> Epoch[143](62/521): Loss: 0.0120 || Timer: 0.0002 sec.
===> Epoch[143](63/521): Loss: 0.0057 || Timer: 0.0002 sec.
===> Epoch[143](64/521): Loss: 0.0078 || Timer: 0.0002 sec.
===> Epoch[143](65/521): Loss: 0.0116 || Timer: 0.0002 sec.
===> Epoch[143](66/521): Loss: 0.0105 || Timer: 0.0002 sec.
===> Epoch[143](67/521): Loss: 0.0066 || Timer: 0.0002 sec.
===> Epoch[143](68/521): Loss: 0.0063 || Timer: 0.0002 sec.
===> Epoch[143](69/521): Loss: 0.0110 || Timer: 0.0002 sec.
===> Epoch[143](70/521): Loss: 0.0135 || Timer: 0.0002 sec.
===> Epoch[143](71/521): Loss: 0.0069 || Timer: 0.0002 sec.
===> Epoch[143](72/521): Loss: 0.0102 || Timer: 0.0002 sec.
===> Epoch[143](73/521): Loss: 0.0058 || Timer: 0.0002 sec.
===> Epoch[143](74/521): Loss: 0.0094 ||

===> Epoch[143](193/521): Loss: 0.0126 || Timer: 0.0002 sec.
===> Epoch[143](194/521): Loss: 0.0079 || Timer: 0.0002 sec.
===> Epoch[143](195/521): Loss: 0.0081 || Timer: 0.0001 sec.
===> Epoch[143](196/521): Loss: 0.0073 || Timer: 0.0001 sec.
===> Epoch[143](197/521): Loss: 0.0089 || Timer: 0.0001 sec.
===> Epoch[143](198/521): Loss: 0.0093 || Timer: 0.0001 sec.
===> Epoch[143](199/521): Loss: 0.0120 || Timer: 0.0001 sec.
===> Epoch[143](200/521): Loss: 0.0102 || Timer: 0.0002 sec.
===> Epoch[143](201/521): Loss: 0.0135 || Timer: 0.0001 sec.
===> Epoch[143](202/521): Loss: 0.0045 || Timer: 0.0001 sec.
===> Epoch[143](203/521): Loss: 0.0081 || Timer: 0.0002 sec.
===> Epoch[143](204/521): Loss: 0.0182 || Timer: 0.0002 sec.
===> Epoch[143](205/521): Loss: 0.0080 || Timer: 0.0002 sec.
===> Epoch[143](206/521): Loss: 0.0037 || Timer: 0.0001 sec.
===> Epoch[143](207/521): Loss: 0.0077 || Timer: 0.0002 sec.
===> Epoch[143](208/521): Loss: 0.0086 || Timer: 0.0002 sec.
===> Epoch[143](209/521)

===> Epoch[143](328/521): Loss: 0.0080 || Timer: 0.0001 sec.
===> Epoch[143](329/521): Loss: 0.0072 || Timer: 0.0002 sec.
===> Epoch[143](330/521): Loss: 0.0067 || Timer: 0.0002 sec.
===> Epoch[143](331/521): Loss: 0.0093 || Timer: 0.0002 sec.
===> Epoch[143](332/521): Loss: 0.0073 || Timer: 0.0002 sec.
===> Epoch[143](333/521): Loss: 0.0082 || Timer: 0.0002 sec.
===> Epoch[143](334/521): Loss: 0.0152 || Timer: 0.0001 sec.
===> Epoch[143](335/521): Loss: 0.0058 || Timer: 0.0001 sec.
===> Epoch[143](336/521): Loss: 0.0084 || Timer: 0.0002 sec.
===> Epoch[143](337/521): Loss: 0.0073 || Timer: 0.0001 sec.
===> Epoch[143](338/521): Loss: 0.0104 || Timer: 0.0002 sec.
===> Epoch[143](339/521): Loss: 0.0135 || Timer: 0.0002 sec.
===> Epoch[143](340/521): Loss: 0.0057 || Timer: 0.0002 sec.
===> Epoch[143](341/521): Loss: 0.0063 || Timer: 0.0002 sec.
===> Epoch[143](342/521): Loss: 0.0083 || Timer: 0.0001 sec.
===> Epoch[143](343/521): Loss: 0.0074 || Timer: 0.0002 sec.
===> Epoch[143](344/521)

===> Epoch[143](463/521): Loss: 0.0070 || Timer: 0.0001 sec.
===> Epoch[143](464/521): Loss: 0.0102 || Timer: 0.0001 sec.
===> Epoch[143](465/521): Loss: 0.0060 || Timer: 0.0001 sec.
===> Epoch[143](466/521): Loss: 0.0056 || Timer: 0.0001 sec.
===> Epoch[143](467/521): Loss: 0.0145 || Timer: 0.0001 sec.
===> Epoch[143](468/521): Loss: 0.0062 || Timer: 0.0001 sec.
===> Epoch[143](469/521): Loss: 0.0089 || Timer: 0.0001 sec.
===> Epoch[143](470/521): Loss: 0.0070 || Timer: 0.0001 sec.
===> Epoch[143](471/521): Loss: 0.0103 || Timer: 0.0001 sec.
===> Epoch[143](472/521): Loss: 0.0090 || Timer: 0.0002 sec.
===> Epoch[143](473/521): Loss: 0.0098 || Timer: 0.0002 sec.
===> Epoch[143](474/521): Loss: 0.0083 || Timer: 0.0001 sec.
===> Epoch[143](475/521): Loss: 0.0093 || Timer: 0.0002 sec.
===> Epoch[143](476/521): Loss: 0.0090 || Timer: 0.0001 sec.
===> Epoch[143](477/521): Loss: 0.0121 || Timer: 0.0001 sec.
===> Epoch[143](478/521): Loss: 0.0108 || Timer: 0.0002 sec.
===> Epoch[143](479/521)

===> Epoch[144](77/521): Loss: 0.0080 || Timer: 0.0001 sec.
===> Epoch[144](78/521): Loss: 0.0071 || Timer: 0.0002 sec.
===> Epoch[144](79/521): Loss: 0.0078 || Timer: 0.0001 sec.
===> Epoch[144](80/521): Loss: 0.0069 || Timer: 0.0001 sec.
===> Epoch[144](81/521): Loss: 0.0110 || Timer: 0.0002 sec.
===> Epoch[144](82/521): Loss: 0.0076 || Timer: 0.0001 sec.
===> Epoch[144](83/521): Loss: 0.0078 || Timer: 0.0002 sec.
===> Epoch[144](84/521): Loss: 0.0073 || Timer: 0.0002 sec.
===> Epoch[144](85/521): Loss: 0.0138 || Timer: 0.0002 sec.
===> Epoch[144](86/521): Loss: 0.0117 || Timer: 0.0002 sec.
===> Epoch[144](87/521): Loss: 0.0162 || Timer: 0.0001 sec.
===> Epoch[144](88/521): Loss: 0.0069 || Timer: 0.0001 sec.
===> Epoch[144](89/521): Loss: 0.0097 || Timer: 0.0001 sec.
===> Epoch[144](90/521): Loss: 0.0096 || Timer: 0.0002 sec.
===> Epoch[144](91/521): Loss: 0.0084 || Timer: 0.0002 sec.
===> Epoch[144](92/521): Loss: 0.0078 || Timer: 0.0001 sec.
===> Epoch[144](93/521): Loss: 0.0080 ||

===> Epoch[144](212/521): Loss: 0.0136 || Timer: 0.0003 sec.
===> Epoch[144](213/521): Loss: 0.0063 || Timer: 0.0002 sec.
===> Epoch[144](214/521): Loss: 0.0118 || Timer: 0.0003 sec.
===> Epoch[144](215/521): Loss: 0.0090 || Timer: 0.0002 sec.
===> Epoch[144](216/521): Loss: 0.0077 || Timer: 0.0003 sec.
===> Epoch[144](217/521): Loss: 0.0103 || Timer: 0.0002 sec.
===> Epoch[144](218/521): Loss: 0.0079 || Timer: 0.0003 sec.
===> Epoch[144](219/521): Loss: 0.0082 || Timer: 0.0002 sec.
===> Epoch[144](220/521): Loss: 0.0118 || Timer: 0.0002 sec.
===> Epoch[144](221/521): Loss: 0.0086 || Timer: 0.0001 sec.
===> Epoch[144](222/521): Loss: 0.0064 || Timer: 0.0002 sec.
===> Epoch[144](223/521): Loss: 0.0089 || Timer: 0.0002 sec.
===> Epoch[144](224/521): Loss: 0.0093 || Timer: 0.0003 sec.
===> Epoch[144](225/521): Loss: 0.0064 || Timer: 0.0002 sec.
===> Epoch[144](226/521): Loss: 0.0131 || Timer: 0.0003 sec.
===> Epoch[144](227/521): Loss: 0.0095 || Timer: 0.0002 sec.
===> Epoch[144](228/521)

===> Epoch[144](347/521): Loss: 0.0124 || Timer: 0.0002 sec.
===> Epoch[144](348/521): Loss: 0.0159 || Timer: 0.0002 sec.
===> Epoch[144](349/521): Loss: 0.0086 || Timer: 0.0002 sec.
===> Epoch[144](350/521): Loss: 0.0097 || Timer: 0.0002 sec.
===> Epoch[144](351/521): Loss: 0.0085 || Timer: 0.0002 sec.
===> Epoch[144](352/521): Loss: 0.0097 || Timer: 0.0002 sec.
===> Epoch[144](353/521): Loss: 0.0112 || Timer: 0.0002 sec.
===> Epoch[144](354/521): Loss: 0.0086 || Timer: 0.0001 sec.
===> Epoch[144](355/521): Loss: 0.0090 || Timer: 0.0001 sec.
===> Epoch[144](356/521): Loss: 0.0085 || Timer: 0.0003 sec.
===> Epoch[144](357/521): Loss: 0.0084 || Timer: 0.0002 sec.
===> Epoch[144](358/521): Loss: 0.0078 || Timer: 0.0002 sec.
===> Epoch[144](359/521): Loss: 0.0143 || Timer: 0.0002 sec.
===> Epoch[144](360/521): Loss: 0.0069 || Timer: 0.0002 sec.
===> Epoch[144](361/521): Loss: 0.0080 || Timer: 0.0002 sec.
===> Epoch[144](362/521): Loss: 0.0099 || Timer: 0.0002 sec.
===> Epoch[144](363/521)

===> Epoch[144](482/521): Loss: 0.0056 || Timer: 0.0002 sec.
===> Epoch[144](483/521): Loss: 0.0080 || Timer: 0.0002 sec.
===> Epoch[144](484/521): Loss: 0.0104 || Timer: 0.0002 sec.
===> Epoch[144](485/521): Loss: 0.0091 || Timer: 0.0002 sec.
===> Epoch[144](486/521): Loss: 0.0123 || Timer: 0.0002 sec.
===> Epoch[144](487/521): Loss: 0.0069 || Timer: 0.0002 sec.
===> Epoch[144](488/521): Loss: 0.0115 || Timer: 0.0002 sec.
===> Epoch[144](489/521): Loss: 0.0081 || Timer: 0.0002 sec.
===> Epoch[144](490/521): Loss: 0.0085 || Timer: 0.0002 sec.
===> Epoch[144](491/521): Loss: 0.0092 || Timer: 0.0002 sec.
===> Epoch[144](492/521): Loss: 0.0058 || Timer: 0.0002 sec.
===> Epoch[144](493/521): Loss: 0.0089 || Timer: 0.0002 sec.
===> Epoch[144](494/521): Loss: 0.0173 || Timer: 0.0002 sec.
===> Epoch[144](495/521): Loss: 0.0127 || Timer: 0.0002 sec.
===> Epoch[144](496/521): Loss: 0.0360 || Timer: 0.0002 sec.
===> Epoch[144](497/521): Loss: 0.0073 || Timer: 0.0002 sec.
===> Epoch[144](498/521)

===> Epoch[145](96/521): Loss: 0.0057 || Timer: 0.0002 sec.
===> Epoch[145](97/521): Loss: 0.0106 || Timer: 0.0002 sec.
===> Epoch[145](98/521): Loss: 0.0135 || Timer: 0.0002 sec.
===> Epoch[145](99/521): Loss: 0.0073 || Timer: 0.0002 sec.
===> Epoch[145](100/521): Loss: 0.0069 || Timer: 0.0002 sec.
===> Epoch[145](101/521): Loss: 0.0079 || Timer: 0.0002 sec.
===> Epoch[145](102/521): Loss: 0.0104 || Timer: 0.0002 sec.
===> Epoch[145](103/521): Loss: 0.0089 || Timer: 0.0002 sec.
===> Epoch[145](104/521): Loss: 0.0067 || Timer: 0.0002 sec.
===> Epoch[145](105/521): Loss: 0.0093 || Timer: 0.0002 sec.
===> Epoch[145](106/521): Loss: 0.0145 || Timer: 0.0002 sec.
===> Epoch[145](107/521): Loss: 0.0066 || Timer: 0.0002 sec.
===> Epoch[145](108/521): Loss: 0.0046 || Timer: 0.0002 sec.
===> Epoch[145](109/521): Loss: 0.0129 || Timer: 0.0002 sec.
===> Epoch[145](110/521): Loss: 0.0098 || Timer: 0.0003 sec.
===> Epoch[145](111/521): Loss: 0.0094 || Timer: 0.0002 sec.
===> Epoch[145](112/521): Lo

===> Epoch[145](231/521): Loss: 0.0155 || Timer: 0.0001 sec.
===> Epoch[145](232/521): Loss: 0.0055 || Timer: 0.0002 sec.
===> Epoch[145](233/521): Loss: 0.0063 || Timer: 0.0002 sec.
===> Epoch[145](234/521): Loss: 0.0092 || Timer: 0.0002 sec.
===> Epoch[145](235/521): Loss: 0.0153 || Timer: 0.0001 sec.
===> Epoch[145](236/521): Loss: 0.0158 || Timer: 0.0002 sec.
===> Epoch[145](237/521): Loss: 0.0093 || Timer: 0.0001 sec.
===> Epoch[145](238/521): Loss: 0.0076 || Timer: 0.0002 sec.
===> Epoch[145](239/521): Loss: 0.0063 || Timer: 0.0002 sec.
===> Epoch[145](240/521): Loss: 0.0101 || Timer: 0.0001 sec.
===> Epoch[145](241/521): Loss: 0.0142 || Timer: 0.0002 sec.
===> Epoch[145](242/521): Loss: 0.0071 || Timer: 0.0002 sec.
===> Epoch[145](243/521): Loss: 0.0136 || Timer: 0.0002 sec.
===> Epoch[145](244/521): Loss: 0.0096 || Timer: 0.0002 sec.
===> Epoch[145](245/521): Loss: 0.0160 || Timer: 0.0002 sec.
===> Epoch[145](246/521): Loss: 0.0125 || Timer: 0.0002 sec.
===> Epoch[145](247/521)

===> Epoch[145](366/521): Loss: 0.0097 || Timer: 0.0001 sec.
===> Epoch[145](367/521): Loss: 0.0108 || Timer: 0.0002 sec.
===> Epoch[145](368/521): Loss: 0.0071 || Timer: 0.0002 sec.
===> Epoch[145](369/521): Loss: 0.0062 || Timer: 0.0002 sec.
===> Epoch[145](370/521): Loss: 0.0104 || Timer: 0.0002 sec.
===> Epoch[145](371/521): Loss: 0.0069 || Timer: 0.0002 sec.
===> Epoch[145](372/521): Loss: 0.0074 || Timer: 0.0002 sec.
===> Epoch[145](373/521): Loss: 0.0057 || Timer: 0.0002 sec.
===> Epoch[145](374/521): Loss: 0.0087 || Timer: 0.0001 sec.
===> Epoch[145](375/521): Loss: 0.0095 || Timer: 0.0002 sec.
===> Epoch[145](376/521): Loss: 0.0082 || Timer: 0.0001 sec.
===> Epoch[145](377/521): Loss: 0.0060 || Timer: 0.0001 sec.
===> Epoch[145](378/521): Loss: 0.0069 || Timer: 0.0001 sec.
===> Epoch[145](379/521): Loss: 0.0103 || Timer: 0.0002 sec.
===> Epoch[145](380/521): Loss: 0.0067 || Timer: 0.0001 sec.
===> Epoch[145](381/521): Loss: 0.0104 || Timer: 0.0001 sec.
===> Epoch[145](382/521)

===> Epoch[145](501/521): Loss: 0.0105 || Timer: 0.0002 sec.
===> Epoch[145](502/521): Loss: 0.0087 || Timer: 0.0001 sec.
===> Epoch[145](503/521): Loss: 0.0190 || Timer: 0.0001 sec.
===> Epoch[145](504/521): Loss: 0.0145 || Timer: 0.0001 sec.
===> Epoch[145](505/521): Loss: 0.0063 || Timer: 0.0001 sec.
===> Epoch[145](506/521): Loss: 0.0071 || Timer: 0.0001 sec.
===> Epoch[145](507/521): Loss: 0.0069 || Timer: 0.0001 sec.
===> Epoch[145](508/521): Loss: 0.0095 || Timer: 0.0002 sec.
===> Epoch[145](509/521): Loss: 0.0103 || Timer: 0.0001 sec.
===> Epoch[145](510/521): Loss: 0.0081 || Timer: 0.0001 sec.
===> Epoch[145](511/521): Loss: 0.0083 || Timer: 0.0001 sec.
===> Epoch[145](512/521): Loss: 0.0099 || Timer: 0.0001 sec.
===> Epoch[145](513/521): Loss: 0.0120 || Timer: 0.0001 sec.
===> Epoch[145](514/521): Loss: 0.0078 || Timer: 0.0001 sec.
===> Epoch[145](515/521): Loss: 0.0122 || Timer: 0.0001 sec.
===> Epoch[145](516/521): Loss: 0.0090 || Timer: 0.0002 sec.
===> Epoch[145](517/521)

===> Epoch[146](115/521): Loss: 0.0041 || Timer: 0.0002 sec.
===> Epoch[146](116/521): Loss: 0.0076 || Timer: 0.0002 sec.
===> Epoch[146](117/521): Loss: 0.0137 || Timer: 0.0002 sec.
===> Epoch[146](118/521): Loss: 0.0099 || Timer: 0.0001 sec.
===> Epoch[146](119/521): Loss: 0.0119 || Timer: 0.0002 sec.
===> Epoch[146](120/521): Loss: 0.0067 || Timer: 0.0002 sec.
===> Epoch[146](121/521): Loss: 0.0076 || Timer: 0.0002 sec.
===> Epoch[146](122/521): Loss: 0.0081 || Timer: 0.0002 sec.
===> Epoch[146](123/521): Loss: 0.0089 || Timer: 0.0002 sec.
===> Epoch[146](124/521): Loss: 0.0146 || Timer: 0.0002 sec.
===> Epoch[146](125/521): Loss: 0.0128 || Timer: 0.0002 sec.
===> Epoch[146](126/521): Loss: 0.0063 || Timer: 0.0002 sec.
===> Epoch[146](127/521): Loss: 0.0058 || Timer: 0.0002 sec.
===> Epoch[146](128/521): Loss: 0.0095 || Timer: 0.0002 sec.
===> Epoch[146](129/521): Loss: 0.0059 || Timer: 0.0002 sec.
===> Epoch[146](130/521): Loss: 0.0072 || Timer: 0.0002 sec.
===> Epoch[146](131/521)

===> Epoch[146](250/521): Loss: 0.0115 || Timer: 0.0002 sec.
===> Epoch[146](251/521): Loss: 0.0071 || Timer: 0.0002 sec.
===> Epoch[146](252/521): Loss: 0.0091 || Timer: 0.0002 sec.
===> Epoch[146](253/521): Loss: 0.0111 || Timer: 0.0002 sec.
===> Epoch[146](254/521): Loss: 0.0100 || Timer: 0.0002 sec.
===> Epoch[146](255/521): Loss: 0.0073 || Timer: 0.0002 sec.
===> Epoch[146](256/521): Loss: 0.0100 || Timer: 0.0002 sec.
===> Epoch[146](257/521): Loss: 0.0064 || Timer: 0.0003 sec.
===> Epoch[146](258/521): Loss: 0.0219 || Timer: 0.0002 sec.
===> Epoch[146](259/521): Loss: 0.0107 || Timer: 0.0002 sec.
===> Epoch[146](260/521): Loss: 0.0070 || Timer: 0.0002 sec.
===> Epoch[146](261/521): Loss: 0.0118 || Timer: 0.0002 sec.
===> Epoch[146](262/521): Loss: 0.0120 || Timer: 0.0002 sec.
===> Epoch[146](263/521): Loss: 0.0060 || Timer: 0.0002 sec.
===> Epoch[146](264/521): Loss: 0.0054 || Timer: 0.0002 sec.
===> Epoch[146](265/521): Loss: 0.0088 || Timer: 0.0002 sec.
===> Epoch[146](266/521)

===> Epoch[146](385/521): Loss: 0.0056 || Timer: 0.0002 sec.
===> Epoch[146](386/521): Loss: 0.0089 || Timer: 0.0002 sec.
===> Epoch[146](387/521): Loss: 0.0256 || Timer: 0.0002 sec.
===> Epoch[146](388/521): Loss: 0.0105 || Timer: 0.0002 sec.
===> Epoch[146](389/521): Loss: 0.0295 || Timer: 0.0002 sec.
===> Epoch[146](390/521): Loss: 0.0174 || Timer: 0.0002 sec.
===> Epoch[146](391/521): Loss: 0.0148 || Timer: 0.0002 sec.
===> Epoch[146](392/521): Loss: 0.0154 || Timer: 0.0002 sec.
===> Epoch[146](393/521): Loss: 0.0110 || Timer: 0.0002 sec.
===> Epoch[146](394/521): Loss: 0.0072 || Timer: 0.0002 sec.
===> Epoch[146](395/521): Loss: 0.0064 || Timer: 0.0002 sec.
===> Epoch[146](396/521): Loss: 0.0059 || Timer: 0.0002 sec.
===> Epoch[146](397/521): Loss: 0.0076 || Timer: 0.0001 sec.
===> Epoch[146](398/521): Loss: 0.0064 || Timer: 0.0002 sec.
===> Epoch[146](399/521): Loss: 0.0126 || Timer: 0.0002 sec.
===> Epoch[146](400/521): Loss: 0.0077 || Timer: 0.0005 sec.
===> Epoch[146](401/521)

===> Epoch[146](520/521): Loss: 0.0134 || Timer: 0.0002 sec.
===> Epoch 146 Complete: Avg. Loss: 0.0131 | Time: 0.3825
[Epoch 146] Best accuracy: 0.8087
===> Epoch[147](0/521): Loss: 0.0096 || Timer: 0.0002 sec.
===> Epoch[147](1/521): Loss: 0.0047 || Timer: 0.0002 sec.
===> Epoch[147](2/521): Loss: 0.0084 || Timer: 0.0002 sec.
===> Epoch[147](3/521): Loss: 0.0097 || Timer: 0.0002 sec.
===> Epoch[147](4/521): Loss: 0.0113 || Timer: 0.0002 sec.
===> Epoch[147](5/521): Loss: 0.0135 || Timer: 0.0002 sec.
===> Epoch[147](6/521): Loss: 0.0074 || Timer: 0.0002 sec.
===> Epoch[147](7/521): Loss: 0.0092 || Timer: 0.0002 sec.
===> Epoch[147](8/521): Loss: 0.0088 || Timer: 0.0001 sec.
===> Epoch[147](9/521): Loss: 0.0076 || Timer: 0.0002 sec.
===> Epoch[147](10/521): Loss: 0.0092 || Timer: 0.0002 sec.
===> Epoch[147](11/521): Loss: 0.0080 || Timer: 0.0003 sec.
===> Epoch[147](12/521): Loss: 0.0090 || Timer: 0.0002 sec.
===> Epoch[147](13/521): Loss: 0.0114 || Timer: 0.0002 sec.
===> Epoch[147](1

===> Epoch[147](134/521): Loss: 0.0103 || Timer: 0.0002 sec.
===> Epoch[147](135/521): Loss: 0.0083 || Timer: 0.0002 sec.
===> Epoch[147](136/521): Loss: 0.0191 || Timer: 0.0002 sec.
===> Epoch[147](137/521): Loss: 0.0088 || Timer: 0.0002 sec.
===> Epoch[147](138/521): Loss: 0.0068 || Timer: 0.0002 sec.
===> Epoch[147](139/521): Loss: 0.0065 || Timer: 0.0002 sec.
===> Epoch[147](140/521): Loss: 0.0096 || Timer: 0.0002 sec.
===> Epoch[147](141/521): Loss: 0.0064 || Timer: 0.0002 sec.
===> Epoch[147](142/521): Loss: 0.0108 || Timer: 0.0002 sec.
===> Epoch[147](143/521): Loss: 0.0136 || Timer: 0.0003 sec.
===> Epoch[147](144/521): Loss: 0.0100 || Timer: 0.0002 sec.
===> Epoch[147](145/521): Loss: 0.0094 || Timer: 0.0003 sec.
===> Epoch[147](146/521): Loss: 0.0087 || Timer: 0.0002 sec.
===> Epoch[147](147/521): Loss: 0.0130 || Timer: 0.0003 sec.
===> Epoch[147](148/521): Loss: 0.0107 || Timer: 0.0002 sec.
===> Epoch[147](149/521): Loss: 0.0069 || Timer: 0.0003 sec.
===> Epoch[147](150/521)

===> Epoch[147](269/521): Loss: 0.0078 || Timer: 0.0002 sec.
===> Epoch[147](270/521): Loss: 0.0069 || Timer: 0.0002 sec.
===> Epoch[147](271/521): Loss: 0.0079 || Timer: 0.0002 sec.
===> Epoch[147](272/521): Loss: 0.0204 || Timer: 0.0002 sec.
===> Epoch[147](273/521): Loss: 0.0092 || Timer: 0.0002 sec.
===> Epoch[147](274/521): Loss: 0.0054 || Timer: 0.0002 sec.
===> Epoch[147](275/521): Loss: 0.0103 || Timer: 0.0001 sec.
===> Epoch[147](276/521): Loss: 0.0111 || Timer: 0.0002 sec.
===> Epoch[147](277/521): Loss: 0.0104 || Timer: 0.0002 sec.
===> Epoch[147](278/521): Loss: 0.0188 || Timer: 0.0002 sec.
===> Epoch[147](279/521): Loss: 0.0055 || Timer: 0.0002 sec.
===> Epoch[147](280/521): Loss: 0.0083 || Timer: 0.0002 sec.
===> Epoch[147](281/521): Loss: 0.0065 || Timer: 0.0002 sec.
===> Epoch[147](282/521): Loss: 0.0129 || Timer: 0.0002 sec.
===> Epoch[147](283/521): Loss: 0.0111 || Timer: 0.0002 sec.
===> Epoch[147](284/521): Loss: 0.0092 || Timer: 0.0002 sec.
===> Epoch[147](285/521)

===> Epoch[147](404/521): Loss: 0.0155 || Timer: 0.0001 sec.
===> Epoch[147](405/521): Loss: 0.0061 || Timer: 0.0002 sec.
===> Epoch[147](406/521): Loss: 0.0095 || Timer: 0.0001 sec.
===> Epoch[147](407/521): Loss: 0.0090 || Timer: 0.0002 sec.
===> Epoch[147](408/521): Loss: 0.0082 || Timer: 0.0001 sec.
===> Epoch[147](409/521): Loss: 0.0082 || Timer: 0.0002 sec.
===> Epoch[147](410/521): Loss: 0.0211 || Timer: 0.0002 sec.
===> Epoch[147](411/521): Loss: 0.0065 || Timer: 0.0002 sec.
===> Epoch[147](412/521): Loss: 0.0085 || Timer: 0.0002 sec.
===> Epoch[147](413/521): Loss: 0.0111 || Timer: 0.0002 sec.
===> Epoch[147](414/521): Loss: 0.0151 || Timer: 0.0002 sec.
===> Epoch[147](415/521): Loss: 0.0144 || Timer: 0.0002 sec.
===> Epoch[147](416/521): Loss: 0.0059 || Timer: 0.0002 sec.
===> Epoch[147](417/521): Loss: 0.0157 || Timer: 0.0002 sec.
===> Epoch[147](418/521): Loss: 0.0103 || Timer: 0.0002 sec.
===> Epoch[147](419/521): Loss: 0.0065 || Timer: 0.0002 sec.
===> Epoch[147](420/521)

===> Epoch[148](17/521): Loss: 0.0083 || Timer: 0.0003 sec.
===> Epoch[148](18/521): Loss: 0.0078 || Timer: 0.0003 sec.
===> Epoch[148](19/521): Loss: 0.0076 || Timer: 0.0003 sec.
===> Epoch[148](20/521): Loss: 0.0238 || Timer: 0.0003 sec.
===> Epoch[148](21/521): Loss: 0.0106 || Timer: 0.0003 sec.
===> Epoch[148](22/521): Loss: 0.0052 || Timer: 0.0003 sec.
===> Epoch[148](23/521): Loss: 0.0058 || Timer: 0.0003 sec.
===> Epoch[148](24/521): Loss: 0.0065 || Timer: 0.0003 sec.
===> Epoch[148](25/521): Loss: 0.0159 || Timer: 0.0003 sec.
===> Epoch[148](26/521): Loss: 0.0068 || Timer: 0.0003 sec.
===> Epoch[148](27/521): Loss: 0.0095 || Timer: 0.0003 sec.
===> Epoch[148](28/521): Loss: 0.0103 || Timer: 0.0002 sec.
===> Epoch[148](29/521): Loss: 0.0060 || Timer: 0.0003 sec.
===> Epoch[148](30/521): Loss: 0.0091 || Timer: 0.0003 sec.
===> Epoch[148](31/521): Loss: 0.0111 || Timer: 0.0004 sec.
===> Epoch[148](32/521): Loss: 0.0081 || Timer: 0.0004 sec.
===> Epoch[148](33/521): Loss: 0.0092 ||

===> Epoch[148](153/521): Loss: 0.0075 || Timer: 0.0002 sec.
===> Epoch[148](154/521): Loss: 0.0139 || Timer: 0.0003 sec.
===> Epoch[148](155/521): Loss: 0.0120 || Timer: 0.0002 sec.
===> Epoch[148](156/521): Loss: 0.0095 || Timer: 0.0003 sec.
===> Epoch[148](157/521): Loss: 0.0076 || Timer: 0.0003 sec.
===> Epoch[148](158/521): Loss: 0.0085 || Timer: 0.0003 sec.
===> Epoch[148](159/521): Loss: 0.0077 || Timer: 0.0002 sec.
===> Epoch[148](160/521): Loss: 0.0062 || Timer: 0.0003 sec.
===> Epoch[148](161/521): Loss: 0.0080 || Timer: 0.0004 sec.
===> Epoch[148](162/521): Loss: 0.0090 || Timer: 0.0002 sec.
===> Epoch[148](163/521): Loss: 0.0090 || Timer: 0.0003 sec.
===> Epoch[148](164/521): Loss: 0.0136 || Timer: 0.0003 sec.
===> Epoch[148](165/521): Loss: 0.0063 || Timer: 0.0002 sec.
===> Epoch[148](166/521): Loss: 0.0083 || Timer: 0.0002 sec.
===> Epoch[148](167/521): Loss: 0.0047 || Timer: 0.0002 sec.
===> Epoch[148](168/521): Loss: 0.0062 || Timer: 0.0002 sec.
===> Epoch[148](169/521)

===> Epoch[148](288/521): Loss: 0.0063 || Timer: 0.0003 sec.
===> Epoch[148](289/521): Loss: 0.0114 || Timer: 0.0003 sec.
===> Epoch[148](290/521): Loss: 0.0066 || Timer: 0.0003 sec.
===> Epoch[148](291/521): Loss: 0.0094 || Timer: 0.0003 sec.
===> Epoch[148](292/521): Loss: 0.0068 || Timer: 0.0003 sec.
===> Epoch[148](293/521): Loss: 0.0081 || Timer: 0.0004 sec.
===> Epoch[148](294/521): Loss: 0.0123 || Timer: 0.0002 sec.
===> Epoch[148](295/521): Loss: 0.0055 || Timer: 0.0005 sec.
===> Epoch[148](296/521): Loss: 0.0079 || Timer: 0.0002 sec.
===> Epoch[148](297/521): Loss: 0.0095 || Timer: 0.0003 sec.
===> Epoch[148](298/521): Loss: 0.0098 || Timer: 0.0002 sec.
===> Epoch[148](299/521): Loss: 0.0098 || Timer: 0.0004 sec.
===> Epoch[148](300/521): Loss: 0.0087 || Timer: 0.0004 sec.
===> Epoch[148](301/521): Loss: 0.0111 || Timer: 0.0004 sec.
===> Epoch[148](302/521): Loss: 0.0078 || Timer: 0.0002 sec.
===> Epoch[148](303/521): Loss: 0.0126 || Timer: 0.0003 sec.
===> Epoch[148](304/521)

===> Epoch[148](423/521): Loss: 0.0054 || Timer: 0.0002 sec.
===> Epoch[148](424/521): Loss: 0.0068 || Timer: 0.0002 sec.
===> Epoch[148](425/521): Loss: 0.0141 || Timer: 0.0002 sec.
===> Epoch[148](426/521): Loss: 0.0120 || Timer: 0.0002 sec.
===> Epoch[148](427/521): Loss: 0.0115 || Timer: 0.0002 sec.
===> Epoch[148](428/521): Loss: 0.0174 || Timer: 0.0002 sec.
===> Epoch[148](429/521): Loss: 0.0107 || Timer: 0.0002 sec.
===> Epoch[148](430/521): Loss: 0.0091 || Timer: 0.0002 sec.
===> Epoch[148](431/521): Loss: 0.0079 || Timer: 0.0002 sec.
===> Epoch[148](432/521): Loss: 0.0192 || Timer: 0.0002 sec.
===> Epoch[148](433/521): Loss: 0.0090 || Timer: 0.0002 sec.
===> Epoch[148](434/521): Loss: 0.0088 || Timer: 0.0002 sec.
===> Epoch[148](435/521): Loss: 0.0106 || Timer: 0.0002 sec.
===> Epoch[148](436/521): Loss: 0.0112 || Timer: 0.0002 sec.
===> Epoch[148](437/521): Loss: 0.0095 || Timer: 0.0002 sec.
===> Epoch[148](438/521): Loss: 0.0102 || Timer: 0.0002 sec.
===> Epoch[148](439/521)

===> Epoch[149](36/521): Loss: 0.0112 || Timer: 0.0002 sec.
===> Epoch[149](37/521): Loss: 0.0086 || Timer: 0.0002 sec.
===> Epoch[149](38/521): Loss: 0.0073 || Timer: 0.0001 sec.
===> Epoch[149](39/521): Loss: 0.0158 || Timer: 0.0001 sec.
===> Epoch[149](40/521): Loss: 0.0139 || Timer: 0.0003 sec.
===> Epoch[149](41/521): Loss: 0.0060 || Timer: 0.0002 sec.
===> Epoch[149](42/521): Loss: 0.0114 || Timer: 0.0002 sec.
===> Epoch[149](43/521): Loss: 0.0057 || Timer: 0.0002 sec.
===> Epoch[149](44/521): Loss: 0.0080 || Timer: 0.0002 sec.
===> Epoch[149](45/521): Loss: 0.0046 || Timer: 0.0002 sec.
===> Epoch[149](46/521): Loss: 0.0133 || Timer: 0.0002 sec.
===> Epoch[149](47/521): Loss: 0.0095 || Timer: 0.0002 sec.
===> Epoch[149](48/521): Loss: 0.0100 || Timer: 0.0002 sec.
===> Epoch[149](49/521): Loss: 0.0054 || Timer: 0.0002 sec.
===> Epoch[149](50/521): Loss: 0.0093 || Timer: 0.0003 sec.
===> Epoch[149](51/521): Loss: 0.0124 || Timer: 0.0002 sec.
===> Epoch[149](52/521): Loss: 0.0085 ||

===> Epoch[149](172/521): Loss: 0.0136 || Timer: 0.0002 sec.
===> Epoch[149](173/521): Loss: 0.0115 || Timer: 0.0002 sec.
===> Epoch[149](174/521): Loss: 0.0088 || Timer: 0.0002 sec.
===> Epoch[149](175/521): Loss: 0.0105 || Timer: 0.0002 sec.
===> Epoch[149](176/521): Loss: 0.0095 || Timer: 0.0002 sec.
===> Epoch[149](177/521): Loss: 0.0093 || Timer: 0.0003 sec.
===> Epoch[149](178/521): Loss: 0.0085 || Timer: 0.0002 sec.
===> Epoch[149](179/521): Loss: 0.0077 || Timer: 0.0002 sec.
===> Epoch[149](180/521): Loss: 0.0096 || Timer: 0.0002 sec.
===> Epoch[149](181/521): Loss: 0.0088 || Timer: 0.0002 sec.
===> Epoch[149](182/521): Loss: 0.0105 || Timer: 0.0002 sec.
===> Epoch[149](183/521): Loss: 0.0107 || Timer: 0.0002 sec.
===> Epoch[149](184/521): Loss: 0.0067 || Timer: 0.0003 sec.
===> Epoch[149](185/521): Loss: 0.0084 || Timer: 0.0002 sec.
===> Epoch[149](186/521): Loss: 0.0156 || Timer: 0.0003 sec.
===> Epoch[149](187/521): Loss: 0.0106 || Timer: 0.0002 sec.
===> Epoch[149](188/521)

===> Epoch[149](307/521): Loss: 0.0077 || Timer: 0.0002 sec.
===> Epoch[149](308/521): Loss: 0.0090 || Timer: 0.0002 sec.
===> Epoch[149](309/521): Loss: 0.0263 || Timer: 0.0002 sec.
===> Epoch[149](310/521): Loss: 0.0099 || Timer: 0.0002 sec.
===> Epoch[149](311/521): Loss: 0.0062 || Timer: 0.0002 sec.
===> Epoch[149](312/521): Loss: 0.0113 || Timer: 0.0002 sec.
===> Epoch[149](313/521): Loss: 0.0073 || Timer: 0.0003 sec.
===> Epoch[149](314/521): Loss: 0.0114 || Timer: 0.0002 sec.
===> Epoch[149](315/521): Loss: 0.0103 || Timer: 0.0002 sec.
===> Epoch[149](316/521): Loss: 0.0113 || Timer: 0.0002 sec.
===> Epoch[149](317/521): Loss: 0.0209 || Timer: 0.0003 sec.
===> Epoch[149](318/521): Loss: 0.0093 || Timer: 0.0002 sec.
===> Epoch[149](319/521): Loss: 0.0094 || Timer: 0.0002 sec.
===> Epoch[149](320/521): Loss: 0.0091 || Timer: 0.0002 sec.
===> Epoch[149](321/521): Loss: 0.0333 || Timer: 0.0002 sec.
===> Epoch[149](322/521): Loss: 0.0136 || Timer: 0.0002 sec.
===> Epoch[149](323/521)

===> Epoch[149](442/521): Loss: 0.0134 || Timer: 0.0003 sec.
===> Epoch[149](443/521): Loss: 0.0122 || Timer: 0.0002 sec.
===> Epoch[149](444/521): Loss: 0.0068 || Timer: 0.0003 sec.
===> Epoch[149](445/521): Loss: 0.0104 || Timer: 0.0002 sec.
===> Epoch[149](446/521): Loss: 0.0072 || Timer: 0.0003 sec.
===> Epoch[149](447/521): Loss: 0.0054 || Timer: 0.0002 sec.
===> Epoch[149](448/521): Loss: 0.0148 || Timer: 0.0002 sec.
===> Epoch[149](449/521): Loss: 0.0099 || Timer: 0.0002 sec.
===> Epoch[149](450/521): Loss: 0.0064 || Timer: 0.0002 sec.
===> Epoch[149](451/521): Loss: 0.0214 || Timer: 0.0002 sec.
===> Epoch[149](452/521): Loss: 0.0128 || Timer: 0.0002 sec.
===> Epoch[149](453/521): Loss: 0.0060 || Timer: 0.0003 sec.
===> Epoch[149](454/521): Loss: 0.0073 || Timer: 0.0003 sec.
===> Epoch[149](455/521): Loss: 0.0063 || Timer: 0.0003 sec.
===> Epoch[149](456/521): Loss: 0.0130 || Timer: 0.0003 sec.
===> Epoch[149](457/521): Loss: 0.0079 || Timer: 0.0003 sec.
===> Epoch[149](458/521)