在激活函数前后加入BatchNorm层
===

In [2]:
import os
import time

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import Subset

from torchvision import datasets
from torchvision import transforms

import matplotlib.pyplot as plt
from PIL import Image

if torch.cuda.is_available():
    torch.backends.cudnn.deterministic = True

# 1.全局设置

In [3]:
RANDOM_SEED = 1
LEARNING_RATE = 0.0005
BATCH_SIZE = 256
NUM_EPOCHS = 100

# Architecture
NUM_CLASSES = 10

# Other
DEVICE = "cuda:0"
GRAYSCALE = False

# 2.导入数据

In [4]:
train_indices = torch.arange(0, 49000)
valid_indices = torch.arange(49000, 50000)


train_and_valid = datasets.CIFAR10(root='/input/', 
                                   train=True, 
                                   transform=transforms.ToTensor(),
                                   download=True)

train_dataset = Subset(train_and_valid, train_indices)
valid_dataset = Subset(train_and_valid, valid_indices)


test_dataset = datasets.CIFAR10(root='/input/', 
                                train=False, 
                                transform=transforms.ToTensor())


#####################################################
### Data Loaders
#####################################################

train_loader = DataLoader(dataset=train_dataset, 
                          batch_size=BATCH_SIZE,
                          num_workers=8,
                          shuffle=True)

valid_loader = DataLoader(dataset=valid_dataset, 
                          batch_size=BATCH_SIZE,
                          num_workers=8,
                          shuffle=False)

test_loader = DataLoader(dataset=test_dataset, 
                         batch_size=BATCH_SIZE,
                         num_workers=8,
                         shuffle=False)

Files already downloaded and verified


# 3.模型与训练

## 3.1.不加入BatchNorm层

In [5]:
class NiN(nn.Module):
    def __init__(self, num_classes):
        super(NiN, self).__init__()
        self.num_classes = num_classes
        self.classifier = nn.Sequential(
                nn.Conv2d(3, 192, kernel_size=5, stride=1, padding=2),
                nn.ReLU(inplace=True),
                nn.Conv2d(192, 160, kernel_size=1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.Conv2d(160,  96, kernel_size=1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
                nn.Dropout(0.5),

                nn.Conv2d(96, 192, kernel_size=5, stride=1, padding=2),
                nn.ReLU(inplace=True),
                nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.AvgPool2d(kernel_size=3, stride=2, padding=1),
                nn.Dropout(0.5),

                nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.Conv2d(192,  10, kernel_size=1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.AvgPool2d(kernel_size=8, stride=1, padding=0),

                )

    def forward(self, x):
        x = self.classifier(x)
        logits = x.view(x.size(0), self.num_classes)
        probas = torch.softmax(logits, dim=1)
        return logits, probas

In [6]:
torch.manual_seed(RANDOM_SEED)

model = NiN(NUM_CLASSES)
model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [7]:
def compute_accuracy(model, data_loader, device):
    correct_pred, num_examples = 0, 0
    for i, (features, targets) in enumerate(data_loader):
            
        features = features.to(device)
        targets = targets.to(device)

        logits, probas = model(features)
        _, predicted_labels = torch.max(probas, 1)
        num_examples += targets.size(0)
        correct_pred += (predicted_labels == targets).sum()
    return correct_pred.float()/num_examples * 100
    

start_time = time.time()
for epoch in range(NUM_EPOCHS):
    
    model.train()
    
    for batch_idx, (features, targets) in enumerate(train_loader):
    
        ### PREPARE MINIBATCH
        features = features.to(DEVICE)
        targets = targets.to(DEVICE)
            
        ### FORWARD AND BACK PROP
        logits, probas = model(features)
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()
        
        cost.backward()
        
        ### UPDATE MODEL PARAMETERS
        optimizer.step()
        
        ### LOGGING
        if not batch_idx % 120:
            print (f'Epoch: {epoch+1:03d}/{NUM_EPOCHS:03d} | '
                   f'Batch {batch_idx:03d}/{len(train_loader):03d} |' 
                   f' Cost: {cost:.4f}')

    # no need to build the computation graph for backprop when computing accuracy
    with torch.set_grad_enabled(False):
        train_acc = compute_accuracy(model, train_loader, device=DEVICE)
        valid_acc = compute_accuracy(model, valid_loader, device=DEVICE)
        print(f'Epoch: {epoch+1:03d}/{NUM_EPOCHS:03d} Train Acc.: {train_acc:.2f}%'
              f' | Validation Acc.: {valid_acc:.2f}%')
        
    elapsed = (time.time() - start_time)/60
    print(f'Time elapsed: {elapsed:.2f} min')
  
elapsed = (time.time() - start_time)/60
print(f'Total Training Time: {elapsed:.2f} min')

Epoch: 001/100 | Batch 000/192 | Cost: 2.3043
Epoch: 001/100 | Batch 120/192 | Cost: 2.0062
Epoch: 001/100 Train Acc.: 26.97% | Validation Acc.: 28.60%
Time elapsed: 0.35 min
Epoch: 002/100 | Batch 000/192 | Cost: 1.8996
Epoch: 002/100 | Batch 120/192 | Cost: 1.8640
Epoch: 002/100 Train Acc.: 33.31% | Validation Acc.: 32.80%
Time elapsed: 0.69 min
Epoch: 003/100 | Batch 000/192 | Cost: 1.7248
Epoch: 003/100 | Batch 120/192 | Cost: 1.7386
Epoch: 003/100 Train Acc.: 36.32% | Validation Acc.: 36.00%
Time elapsed: 1.02 min
Epoch: 004/100 | Batch 000/192 | Cost: 1.7034
Epoch: 004/100 | Batch 120/192 | Cost: 1.5707
Epoch: 004/100 Train Acc.: 40.21% | Validation Acc.: 42.70%
Time elapsed: 1.36 min
Epoch: 005/100 | Batch 000/192 | Cost: 1.6372
Epoch: 005/100 | Batch 120/192 | Cost: 1.6354
Epoch: 005/100 Train Acc.: 43.71% | Validation Acc.: 45.90%
Time elapsed: 1.70 min
Epoch: 006/100 | Batch 000/192 | Cost: 1.6369
Epoch: 006/100 | Batch 120/192 | Cost: 1.4114
Epoch: 006/100 Train Acc.: 47.08%

Epoch: 048/100 | Batch 000/192 | Cost: 0.6867
Epoch: 048/100 | Batch 120/192 | Cost: 0.7413
Epoch: 048/100 Train Acc.: 75.81% | Validation Acc.: 70.90%
Time elapsed: 16.20 min
Epoch: 049/100 | Batch 000/192 | Cost: 0.6400
Epoch: 049/100 | Batch 120/192 | Cost: 0.6616
Epoch: 049/100 Train Acc.: 77.20% | Validation Acc.: 71.60%
Time elapsed: 16.53 min
Epoch: 050/100 | Batch 000/192 | Cost: 0.5869
Epoch: 050/100 | Batch 120/192 | Cost: 0.7880
Epoch: 050/100 Train Acc.: 76.54% | Validation Acc.: 73.80%
Time elapsed: 16.87 min
Epoch: 051/100 | Batch 000/192 | Cost: 0.6346
Epoch: 051/100 | Batch 120/192 | Cost: 0.7546
Epoch: 051/100 Train Acc.: 76.56% | Validation Acc.: 71.40%
Time elapsed: 17.21 min
Epoch: 052/100 | Batch 000/192 | Cost: 0.7384
Epoch: 052/100 | Batch 120/192 | Cost: 0.6931
Epoch: 052/100 Train Acc.: 78.14% | Validation Acc.: 72.40%
Time elapsed: 17.54 min
Epoch: 053/100 | Batch 000/192 | Cost: 0.6469
Epoch: 053/100 | Batch 120/192 | Cost: 0.5473
Epoch: 053/100 Train Acc.: 7

Epoch: 095/100 | Batch 000/192 | Cost: 0.5056
Epoch: 095/100 | Batch 120/192 | Cost: 0.3520
Epoch: 095/100 Train Acc.: 84.24% | Validation Acc.: 74.90%
Time elapsed: 32.59 min
Epoch: 096/100 | Batch 000/192 | Cost: 0.4989
Epoch: 096/100 | Batch 120/192 | Cost: 0.4355
Epoch: 096/100 Train Acc.: 83.62% | Validation Acc.: 73.20%
Time elapsed: 32.94 min
Epoch: 097/100 | Batch 000/192 | Cost: 0.4089
Epoch: 097/100 | Batch 120/192 | Cost: 0.3521
Epoch: 097/100 Train Acc.: 83.88% | Validation Acc.: 73.70%
Time elapsed: 33.29 min
Epoch: 098/100 | Batch 000/192 | Cost: 0.4364
Epoch: 098/100 | Batch 120/192 | Cost: 0.4730
Epoch: 098/100 Train Acc.: 84.07% | Validation Acc.: 73.60%
Time elapsed: 33.64 min
Epoch: 099/100 | Batch 000/192 | Cost: 0.5375
Epoch: 099/100 | Batch 120/192 | Cost: 0.4485
Epoch: 099/100 Train Acc.: 84.07% | Validation Acc.: 72.80%
Time elapsed: 33.99 min
Epoch: 100/100 | Batch 000/192 | Cost: 0.4016
Epoch: 100/100 | Batch 120/192 | Cost: 0.5793
Epoch: 100/100 Train Acc.: 8

# 3.2.在激活函数前加入BatchNorm

In [8]:
class NiN(nn.Module):
    def __init__(self, num_classes):
        super(NiN, self).__init__()
        self.num_classes = num_classes
        self.classifier = nn.Sequential(
                nn.Conv2d(3, 192, kernel_size=5, stride=1, padding=2, bias=False),
                nn.BatchNorm2d(192),
                nn.ReLU(inplace=True),
                nn.Conv2d(192, 160, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(160),
                nn.ReLU(inplace=True),
                nn.Conv2d(160,  96, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(96),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
                nn.Dropout(0.5),

                nn.Conv2d(96, 192, kernel_size=5, stride=1, padding=2, bias=False),
                nn.BatchNorm2d(192),
                nn.ReLU(inplace=True),
                nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(192),
                nn.ReLU(inplace=True),
                nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(192),
                nn.ReLU(inplace=True),
                nn.AvgPool2d(kernel_size=3, stride=2, padding=1),
                nn.Dropout(0.5),

                nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1, bias=False),
                nn.BatchNorm2d(192),
                nn.ReLU(inplace=True),
                nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0, bias=False),
                nn.BatchNorm2d(192),
                nn.ReLU(inplace=True),
                nn.Conv2d(192,  10, kernel_size=1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.AvgPool2d(kernel_size=8, stride=1, padding=0),

                )

    def forward(self, x):
        x = self.classifier(x)
        logits = x.view(x.size(0), self.num_classes)
        probas = torch.softmax(logits, dim=1)
        return logits, probas

In [9]:
torch.manual_seed(RANDOM_SEED)

model = NiN(NUM_CLASSES)
model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)  

In [10]:
start_time = time.time()
for epoch in range(NUM_EPOCHS):
    
    model.train()
    
    for batch_idx, (features, targets) in enumerate(train_loader):
    
        ### PREPARE MINIBATCH
        features = features.to(DEVICE)
        targets = targets.to(DEVICE)
            
        ### FORWARD AND BACK PROP
        logits, probas = model(features)
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()
        
        cost.backward()
        
        ### UPDATE MODEL PARAMETERS
        optimizer.step()
        
        ### LOGGING
        if not batch_idx % 120:
            print (f'Epoch: {epoch+1:03d}/{NUM_EPOCHS:03d} | '
                   f'Batch {batch_idx:03d}/{len(train_loader):03d} |' 
                   f' Cost: {cost:.4f}')

    # no need to build the computation graph for backprop when computing accuracy
    with torch.set_grad_enabled(False):
        train_acc = compute_accuracy(model, train_loader, device=DEVICE)
        valid_acc = compute_accuracy(model, valid_loader, device=DEVICE)
        print(f'Epoch: {epoch+1:03d}/{NUM_EPOCHS:03d} Train Acc.: {train_acc:.2f}%'
              f' | Validation Acc.: {valid_acc:.2f}%')
        
    elapsed = (time.time() - start_time)/60
    print(f'Time elapsed: {elapsed:.2f} min')
  
elapsed = (time.time() - start_time)/60
print(f'Total Training Time: {elapsed:.2f} min')

Epoch: 001/100 | Batch 000/192 | Cost: 2.3148
Epoch: 001/100 | Batch 120/192 | Cost: 1.4051
Epoch: 001/100 Train Acc.: 58.11% | Validation Acc.: 58.50%
Time elapsed: 0.38 min
Epoch: 002/100 | Batch 000/192 | Cost: 1.1883
Epoch: 002/100 | Batch 120/192 | Cost: 1.0690
Epoch: 002/100 Train Acc.: 66.62% | Validation Acc.: 65.40%
Time elapsed: 0.75 min
Epoch: 003/100 | Batch 000/192 | Cost: 0.9535
Epoch: 003/100 | Batch 120/192 | Cost: 0.8616
Epoch: 003/100 Train Acc.: 70.90% | Validation Acc.: 69.90%
Time elapsed: 1.13 min
Epoch: 004/100 | Batch 000/192 | Cost: 0.9257
Epoch: 004/100 | Batch 120/192 | Cost: 0.7893
Epoch: 004/100 Train Acc.: 74.34% | Validation Acc.: 74.00%
Time elapsed: 1.51 min
Epoch: 005/100 | Batch 000/192 | Cost: 0.7172
Epoch: 005/100 | Batch 120/192 | Cost: 0.7068
Epoch: 005/100 Train Acc.: 76.68% | Validation Acc.: 75.20%
Time elapsed: 1.88 min
Epoch: 006/100 | Batch 000/192 | Cost: 0.7002
Epoch: 006/100 | Batch 120/192 | Cost: 0.5935
Epoch: 006/100 Train Acc.: 79.11%

Epoch: 048/100 | Batch 000/192 | Cost: 0.0755
Epoch: 048/100 | Batch 120/192 | Cost: 0.1599
Epoch: 048/100 Train Acc.: 96.39% | Validation Acc.: 85.40%
Time elapsed: 18.11 min
Epoch: 049/100 | Batch 000/192 | Cost: 0.1242
Epoch: 049/100 | Batch 120/192 | Cost: 0.1270
Epoch: 049/100 Train Acc.: 96.21% | Validation Acc.: 83.40%
Time elapsed: 18.49 min
Epoch: 050/100 | Batch 000/192 | Cost: 0.1395
Epoch: 050/100 | Batch 120/192 | Cost: 0.0981
Epoch: 050/100 Train Acc.: 96.66% | Validation Acc.: 85.00%
Time elapsed: 18.86 min
Epoch: 051/100 | Batch 000/192 | Cost: 0.0861
Epoch: 051/100 | Batch 120/192 | Cost: 0.0734
Epoch: 051/100 Train Acc.: 96.63% | Validation Acc.: 83.90%
Time elapsed: 19.24 min
Epoch: 052/100 | Batch 000/192 | Cost: 0.0867
Epoch: 052/100 | Batch 120/192 | Cost: 0.1450
Epoch: 052/100 Train Acc.: 96.70% | Validation Acc.: 84.90%
Time elapsed: 19.62 min
Epoch: 053/100 | Batch 000/192 | Cost: 0.1015
Epoch: 053/100 | Batch 120/192 | Cost: 0.0595
Epoch: 053/100 Train Acc.: 9

Epoch: 095/100 | Batch 000/192 | Cost: 0.0541
Epoch: 095/100 | Batch 120/192 | Cost: 0.0459
Epoch: 095/100 Train Acc.: 98.22% | Validation Acc.: 85.60%
Time elapsed: 35.83 min
Epoch: 096/100 | Batch 000/192 | Cost: 0.0590
Epoch: 096/100 | Batch 120/192 | Cost: 0.0800
Epoch: 096/100 Train Acc.: 98.03% | Validation Acc.: 85.50%
Time elapsed: 36.21 min
Epoch: 097/100 | Batch 000/192 | Cost: 0.0338
Epoch: 097/100 | Batch 120/192 | Cost: 0.0769
Epoch: 097/100 Train Acc.: 98.19% | Validation Acc.: 85.80%
Time elapsed: 36.59 min
Epoch: 098/100 | Batch 000/192 | Cost: 0.0506
Epoch: 098/100 | Batch 120/192 | Cost: 0.0550
Epoch: 098/100 Train Acc.: 98.07% | Validation Acc.: 86.00%
Time elapsed: 36.96 min
Epoch: 099/100 | Batch 000/192 | Cost: 0.0348
Epoch: 099/100 | Batch 120/192 | Cost: 0.0643
Epoch: 099/100 Train Acc.: 98.17% | Validation Acc.: 86.40%
Time elapsed: 37.34 min
Epoch: 100/100 | Batch 000/192 | Cost: 0.0334
Epoch: 100/100 | Batch 120/192 | Cost: 0.0745
Epoch: 100/100 Train Acc.: 9

## 3.3.在激活函数后加入BatchNorm

In [11]:
class NiN(nn.Module):
    def __init__(self, num_classes):
        super(NiN, self).__init__()
        self.num_classes = num_classes
        self.classifier = nn.Sequential(
                nn.Conv2d(3, 192, kernel_size=5, stride=1, padding=2, bias=False),
                nn.ReLU(inplace=True),
                nn.BatchNorm2d(192),
                nn.Conv2d(192, 160, kernel_size=1, stride=1, padding=0, bias=False),
                nn.ReLU(inplace=True),
                nn.BatchNorm2d(160),
                nn.Conv2d(160,  96, kernel_size=1, stride=1, padding=0, bias=False),
                nn.ReLU(inplace=True),
                nn.BatchNorm2d(96),
                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
                nn.Dropout(0.5),

                nn.Conv2d(96, 192, kernel_size=5, stride=1, padding=2, bias=False),
                nn.ReLU(inplace=True),
                nn.BatchNorm2d(192),
                nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0, bias=False),
                nn.ReLU(inplace=True),
                nn.BatchNorm2d(192),
                nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0, bias=False),
                nn.ReLU(inplace=True),
                nn.BatchNorm2d(192),
                nn.AvgPool2d(kernel_size=3, stride=2, padding=1),
                nn.Dropout(0.5),

                nn.Conv2d(192, 192, kernel_size=3, stride=1, padding=1, bias=False),
                nn.ReLU(inplace=True),
                nn.BatchNorm2d(192),
                nn.Conv2d(192, 192, kernel_size=1, stride=1, padding=0, bias=False),
                nn.ReLU(inplace=True),
                nn.BatchNorm2d(192),
                nn.Conv2d(192,  10, kernel_size=1, stride=1, padding=0),
                nn.ReLU(inplace=True),
                nn.AvgPool2d(kernel_size=8, stride=1, padding=0),

                )

    def forward(self, x):
        x = self.classifier(x)
        logits = x.view(x.size(0), self.num_classes)
        probas = torch.softmax(logits, dim=1)
        return logits, probas

In [12]:
torch.manual_seed(RANDOM_SEED)

model = NiN(NUM_CLASSES)
model.to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)  

In [13]:
start_time = time.time()
for epoch in range(NUM_EPOCHS):
    
    model.train()
    
    for batch_idx, (features, targets) in enumerate(train_loader):
    
        ### PREPARE MINIBATCH
        features = features.to(DEVICE)
        targets = targets.to(DEVICE)
            
        ### FORWARD AND BACK PROP
        logits, probas = model(features)
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()
        
        cost.backward()
        
        ### UPDATE MODEL PARAMETERS
        optimizer.step()
        
        ### LOGGING
        if not batch_idx % 120:
            print (f'Epoch: {epoch+1:03d}/{NUM_EPOCHS:03d} | '
                   f'Batch {batch_idx:03d}/{len(train_loader):03d} |' 
                   f' Cost: {cost:.4f}')

    # no need to build the computation graph for backprop when computing accuracy
    with torch.set_grad_enabled(False):
        train_acc = compute_accuracy(model, train_loader, device=DEVICE)
        valid_acc = compute_accuracy(model, valid_loader, device=DEVICE)
        print(f'Epoch: {epoch+1:03d}/{NUM_EPOCHS:03d} Train Acc.: {train_acc:.2f}%'
              f' | Validation Acc.: {valid_acc:.2f}%')
        
    elapsed = (time.time() - start_time)/60
    print(f'Time elapsed: {elapsed:.2f} min')
  
elapsed = (time.time() - start_time)/60
print(f'Total Training Time: {elapsed:.2f} min')

Epoch: 001/100 | Batch 000/192 | Cost: 2.3156
Epoch: 001/100 | Batch 120/192 | Cost: 1.3044
Epoch: 001/100 Train Acc.: 62.11% | Validation Acc.: 62.60%
Time elapsed: 0.37 min
Epoch: 002/100 | Batch 000/192 | Cost: 1.0337
Epoch: 002/100 | Batch 120/192 | Cost: 1.0200
Epoch: 002/100 Train Acc.: 69.12% | Validation Acc.: 68.20%
Time elapsed: 0.75 min
Epoch: 003/100 | Batch 000/192 | Cost: 0.8587
Epoch: 003/100 | Batch 120/192 | Cost: 0.7816
Epoch: 003/100 Train Acc.: 74.38% | Validation Acc.: 73.30%
Time elapsed: 1.12 min
Epoch: 004/100 | Batch 000/192 | Cost: 0.8238
Epoch: 004/100 | Batch 120/192 | Cost: 0.7435
Epoch: 004/100 Train Acc.: 77.92% | Validation Acc.: 76.80%
Time elapsed: 1.50 min
Epoch: 005/100 | Batch 000/192 | Cost: 0.6268
Epoch: 005/100 | Batch 120/192 | Cost: 0.6444
Epoch: 005/100 Train Acc.: 80.63% | Validation Acc.: 77.50%
Time elapsed: 1.88 min
Epoch: 006/100 | Batch 000/192 | Cost: 0.6143
Epoch: 006/100 | Batch 120/192 | Cost: 0.4961
Epoch: 006/100 Train Acc.: 82.33%

Epoch: 048/100 | Batch 000/192 | Cost: 0.0560
Epoch: 048/100 | Batch 120/192 | Cost: 0.0911
Epoch: 048/100 Train Acc.: 97.07% | Validation Acc.: 82.20%
Time elapsed: 18.06 min
Epoch: 049/100 | Batch 000/192 | Cost: 0.1096
Epoch: 049/100 | Batch 120/192 | Cost: 0.0901
Epoch: 049/100 Train Acc.: 97.28% | Validation Acc.: 83.70%
Time elapsed: 18.43 min
Epoch: 050/100 | Batch 000/192 | Cost: 0.0926
Epoch: 050/100 | Batch 120/192 | Cost: 0.0841
Epoch: 050/100 Train Acc.: 97.19% | Validation Acc.: 84.00%
Time elapsed: 18.81 min
Epoch: 051/100 | Batch 000/192 | Cost: 0.0588
Epoch: 051/100 | Batch 120/192 | Cost: 0.0493
Epoch: 051/100 Train Acc.: 97.56% | Validation Acc.: 84.50%
Time elapsed: 19.19 min
Epoch: 052/100 | Batch 000/192 | Cost: 0.0573
Epoch: 052/100 | Batch 120/192 | Cost: 0.1091
Epoch: 052/100 Train Acc.: 97.53% | Validation Acc.: 84.20%
Time elapsed: 19.57 min
Epoch: 053/100 | Batch 000/192 | Cost: 0.0695
Epoch: 053/100 | Batch 120/192 | Cost: 0.0595
Epoch: 053/100 Train Acc.: 9

Epoch: 095/100 | Batch 000/192 | Cost: 0.0551
Epoch: 095/100 | Batch 120/192 | Cost: 0.0178
Epoch: 095/100 Train Acc.: 98.71% | Validation Acc.: 84.80%
Time elapsed: 35.74 min
Epoch: 096/100 | Batch 000/192 | Cost: 0.0547
Epoch: 096/100 | Batch 120/192 | Cost: 0.0452
Epoch: 096/100 Train Acc.: 98.45% | Validation Acc.: 86.20%
Time elapsed: 36.11 min
Epoch: 097/100 | Batch 000/192 | Cost: 0.0397
Epoch: 097/100 | Batch 120/192 | Cost: 0.0706
Epoch: 097/100 Train Acc.: 98.44% | Validation Acc.: 85.80%
Time elapsed: 36.49 min
Epoch: 098/100 | Batch 000/192 | Cost: 0.0432
Epoch: 098/100 | Batch 120/192 | Cost: 0.0815
Epoch: 098/100 Train Acc.: 98.45% | Validation Acc.: 85.10%
Time elapsed: 36.86 min
Epoch: 099/100 | Batch 000/192 | Cost: 0.0524
Epoch: 099/100 | Batch 120/192 | Cost: 0.0582
Epoch: 099/100 Train Acc.: 98.36% | Validation Acc.: 84.90%
Time elapsed: 37.24 min
Epoch: 100/100 | Batch 000/192 | Cost: 0.0299
Epoch: 100/100 | Batch 120/192 | Cost: 0.0326
Epoch: 100/100 Train Acc.: 9