In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models

In [2]:
import numpy as np
import os
import json
import time
import copy

In [34]:
class ResBlock(nn.Module):
    """
    Resnet block
    """
    def __init__(self, ch_in, ch_out, stride=1):
        """
        :param ch_in
        :param ch_out
        """
        super(ResBlock, self).__init__()
        
        self.conv1 = nn.Conv2d(ch_in, ch_out, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(ch_out)
        self.conv2 = nn.Conv2d(ch_out, ch_out, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(ch_out)
        
        self.extra = nn.Sequential()
        if (ch_out != ch_in)or(stride!=1):
            #print('ch_out != ch_in')
            # [b, ch_in, h, w] => [b, ch_out, h, w]            
            self.extra = nn.Sequential(
                nn.Conv2d(ch_in, ch_out, kernel_size=1, stride=stride),
                nn.BatchNorm2d(ch_out)
            )
    
    def forward(self, x):
        """
        :param x: [b, ch, h, w]
        :return:
        """
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        # short cut
        # extra module: [b, ch_in, h, w] => [b, ch_out, h, w]
        # element-wise add:
        #print('outshape:',out.shape)
        #print('extrashape:', self.extra(x).shape)
        out = self.extra(x) + out
        out = F.relu(out)
        
        return out
    
class ResNet(nn.Module):
    
    def __init__(self):
        super(ResNet, self).__init__()
        
        # pre-produce layer
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 18, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(18)
        )
        # followed 4 blocks
        ## [b, 64, h, w] => [b, 128, h, w]
        self.blk1 = ResBlock(18, 36, stride=2)
        ## [b, 128, h, w] => [b, 256, h, w]
        self.blk2 = ResBlock(36, 72, stride=2)
        ## [b, 256, h, w] => [b, 512, h, w]
        self.blk3 = ResBlock(72, 144, stride=2)
        ## [b, 512, h, w] => [b, 1024, h, w]
        self.blk4 = ResBlock(144, 144, stride=2)
        
        self.outlayer = nn.Sequential(
            nn.Linear(144,10),
            nn.LogSoftmax(dim=1)
                                     )

    def forward(self, x):
        """
        :param: x:
        :return:
        """
        x = F.relu(self.conv1(x))
        
        ## [b, 64, h, w] => [b, 1024, h, w]
        x = self.blk1(x)
        x = self.blk2(x)
        x = self.blk3(x)
        x = self.blk4(x)
        
        # print('after conv:', x.shape) # [b, 512, 2, 2]
        ## [b, 512, h, w] => [b, 512, 1, 1]
        x = F.adaptive_avg_pool2d(x, [1,1])
        # print('after pool:', x.shape)
        
        x = x.view(x.size(0), -1)  # flatten
        x = self.outlayer(x)
        
        return x

In [35]:
'''blk = ResBlock(64, 128, stride=2)
tmp = torch.randn(2,64,64,64)
out = blk(tmp)
print('block:', out.shape)'''

x = torch.randn(2,3,32,32)
model = ResNet()
out = model(x)
print('Resnet:',out.shape)

Resnet: torch.Size([2, 10])


In [4]:
model_ = ResNet()
model_.parameters

<bound method Module.parameters of ResNet(
  (conv1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (blk1): ResBlock(
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (extra): Sequential(
      (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (blk2): ResBlock(
    (conv1): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(

In [76]:
_params = list(model_ft.parameters())
k = 0
for i in _params:
    l = 1
    for j in i.size():
        l *= j
    k+=l
k

780562

In [45]:
model_ = ResNet()
model_.parameters

<bound method Module.parameters of ResNet(
  (conv1): Sequential(
    (0): Conv2d(3, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(18, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (blk1): ResBlock(
    (conv1): Conv2d(18, 36, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(36, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn2): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (extra): Sequential(
      (0): Conv2d(18, 36, kernel_size=(1, 1), stride=(2, 2))
      (1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (blk2): ResBlock(
    (conv1): Conv2d(36, 72, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (bn1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(72, 72, ke

# Data Preparation

In [5]:
train_on_gpu = torch.cuda.is_available()
device = torch.device('cuda:0' if train_on_gpu else 'cpu')
device

device(type='cpu')

In [39]:
data_dir = '../data/'
train_dir = data_dir + 'train/'
valid_dir = data_dir + 'valid/'

data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((32,32)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485,0.456,0.406],
                             std=[0.229,0.224,0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize((32,32)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485,0.456,0.406],
                             std=[0.229,0.224,0.225])
    ])
}

In [40]:
batch_size = 16

image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'valid']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True) for x in ['train', 'valid']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train','valid']}

class_names = image_datasets['train'].classes

In [41]:
model_ft = ResNet().to(device)
criterion = nn.CrossEntropyLoss().to(device)
#criterion = nn.NLLLoss().to(device)
optimizer_ft = optim.Adam(model_ft.parameters(), lr=1e-3)
scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)


In [46]:
def train_model(model, device, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False, filename='outupt1.pth'):
    since = time.time()
    best_acc = 0
    model.to(device)
    #print(model)
    
    # process records
    val_acc_history = []
    train_acc_history = []
    train_losses = []
    valid_losses = []
    LRs = [optimizer.param_groups[0]['lr']]
    
    best_model_wts = copy.deepcopy(model.state_dict())
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs-1))
        print('-' *10)
        
        for phase in ['train', 'valid']:
            if phase == 'train':
                print('###training###')
                model.train()
            else:
                print('###validating###')
                model.eval()
            
            running_loss = 0.
            running_correct = 0.
            
            #bb = 0
            for inputs, labels in dataloaders[phase]:
                
                inputs, labels = inputs.to(device), labels.to(device)
 
                with torch.set_grad_enabled(phase=='train'):
                    # outputs: [b, 10]
                    # label: [b]
                    # lodd: tensor scalar
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    if phase=='train':
                        #print('training batch:', bb)
                        #bb+=1
                        # backprop
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                # calculate the loss
                #print('loss:%.3f'%loss.item())
                running_loss += loss.item() * inputs.size(0)
                running_correct += torch.sum(preds==labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_correct.double() / len(dataloaders[phase].dataset)

            time_elapsed = time.time()- since
            print('Time elapsed {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            if phase=='valid' and epoch_acc>best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                state = {
                    'state_dict': model.state_dict(),
                    'best_acc': best_acc,
                    'optimizer': optimizer.state_dict()
                }
                torch.save(state, filename)
            
            if phase=='valid':
                val_acc_history.append(epoch_acc)
                valid_losses.append(epoch_loss)
                scheduler.step(epoch_loss)
            if phase=='train':
                train_acc_history.append(epoch_acc)
                train_losses.append(epoch_loss)
            
        print('Optimizer learning rate: {:.7f}'.format(optimizer.param_groups[0]['lr']))
        LRs.append(optimizer.param_groups[0]['lr'])
        print()
        
    time_elapsed = time.time()- since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    
    model.load_state_dict(best_model_wts)
    return model, val_acc_history, train_acc_history, valid_losses, train_losses, LRs

In [47]:
model1, val_acc_history1, train_acc_history1, valid_losses1, train_losses1, LRs1 = train_model(
                                    model_ft, device, dataloaders, criterion, optimizer_ft, num_epochs=20)

Epoch 0/19
----------
###training###
Time elapsed 2m 32s
train Loss: 0.0750 Acc: 0.9733
###validating###
Time elapsed 2m 37s
valid Loss: 1.0382 Acc: 0.7705
Optimizer learning rate: 0.0010000

Epoch 1/19
----------
###training###




Time elapsed 4m 45s
train Loss: 0.0548 Acc: 0.9819
###validating###
Time elapsed 4m 49s
valid Loss: 0.9974 Acc: 0.7765
Optimizer learning rate: 0.0010000

Epoch 2/19
----------
###training###


KeyboardInterrupt: 

In [49]:
model1, val_acc_history1, train_acc_history1, valid_losses1, train_losses1, LRs1 = train_model(
                                    model_ft, device, dataloaders, criterion, optimizer_ft, num_epochs=2)

Epoch 0/1
----------
###training###
Time elapsed 2m 6s
train Loss: 0.0603 Acc: 0.9796
###validating###
Time elapsed 2m 10s
valid Loss: 1.0598 Acc: 0.7870
Optimizer learning rate: 0.0010000

Epoch 1/1
----------
###training###




Time elapsed 4m 24s
train Loss: 0.0583 Acc: 0.9791
###validating###
Time elapsed 4m 28s
valid Loss: 1.0319 Acc: 0.7884
Optimizer learning rate: 0.0010000

Training complete in 4m 28s
Best val Acc: 0.788365


In [72]:
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())


Model's state_dict:
conv1.0.weight 	 torch.Size([18, 3, 3, 3])
conv1.0.bias 	 torch.Size([18])
conv1.1.weight 	 torch.Size([18])
conv1.1.bias 	 torch.Size([18])
conv1.1.running_mean 	 torch.Size([18])
conv1.1.running_var 	 torch.Size([18])
conv1.1.num_batches_tracked 	 torch.Size([])
blk1.conv1.weight 	 torch.Size([36, 18, 3, 3])
blk1.conv1.bias 	 torch.Size([36])
blk1.bn1.weight 	 torch.Size([36])
blk1.bn1.bias 	 torch.Size([36])
blk1.bn1.running_mean 	 torch.Size([36])
blk1.bn1.running_var 	 torch.Size([36])
blk1.bn1.num_batches_tracked 	 torch.Size([])
blk1.conv2.weight 	 torch.Size([36, 36, 3, 3])
blk1.conv2.bias 	 torch.Size([36])
blk1.bn2.weight 	 torch.Size([36])
blk1.bn2.bias 	 torch.Size([36])
blk1.bn2.running_mean 	 torch.Size([36])
blk1.bn2.running_var 	 torch.Size([36])
blk1.bn2.num_batches_tracked 	 torch.Size([])
blk1.extra.0.weight 	 torch.Size([36, 18, 1, 1])
blk1.extra.0.bias 	 torch.Size([36])
blk1.extra.1.weight 	 torch.Size([36])
blk1.extra.1.bias 	 torch.Size([36])

In [57]:
dataloaders['valid'].dataset[1]

(tensor([[[-2.0837, -2.0494, -2.0152,  ..., -1.9809, -2.0152, -2.0152],
          [-2.0665, -2.0323, -1.9980,  ..., -2.0152, -2.0323, -2.0152],
          [-2.0323, -2.0323, -1.9980,  ..., -2.0152, -2.0323, -2.0494],
          ...,
          [-2.0665, -2.0665, -2.0152,  ..., -2.0323, -2.0323, -2.0323],
          [-2.0494, -2.0323, -2.0323,  ..., -2.0152, -1.9980, -1.9980],
          [-2.0152, -1.9980, -2.0494,  ..., -2.0152, -2.0152, -2.0152]],
 
         [[-2.0007, -1.9657, -1.9307,  ..., -1.8957, -1.9307, -1.9307],
          [-1.9832, -1.9482, -1.9132,  ..., -1.9307, -1.9482, -1.9307],
          [-1.9482, -1.9482, -1.9132,  ..., -1.9307, -1.9482, -1.9657],
          ...,
          [-1.9832, -1.9832, -1.9307,  ..., -1.9482, -1.9482, -1.9482],
          [-1.9657, -1.9482, -1.9482,  ..., -1.9307, -1.9132, -1.9132],
          [-1.9307, -1.9132, -1.9657,  ..., -1.9307, -1.9307, -1.9307]],
 
         [[-1.8044, -1.7696, -1.7347,  ..., -1.6999, -1.7347, -1.7347],
          [-1.7870, -1.7522,

In [50]:
_outputs = model(inputs)
_loss = criterion(outputs, labels)

ResNet(
  (conv1): Sequential(
    (0): Conv2d(3, 18, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(18, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (blk1): ResBlock(
    (conv1): Conv2d(18, 36, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(36, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (bn2): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (extra): Sequential(
      (0): Conv2d(18, 36, kernel_size=(1, 1), stride=(2, 2))
      (1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (blk2): ResBlock(
    (conv1): Conv2d(36, 72, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (bn1): BatchNorm2d(72, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(72, 72, kernel_size=(3, 3), stride=(1, 1), pa

In [37]:
model, val_acc_history, train_acc_history, valid_losses, train_losses, LRs = train_model(
                                    model_ft, device, dataloaders, criterion, optimizer_ft, num_epochs=3)

Epoch 0/2
----------
###training###
training batch: 0
loss: 31.094457626342773
training batch: 1
loss: 24.240829467773438
training batch: 2
loss: 23.239065170288086
training batch: 3
loss: 18.242395401000977
training batch: 4
loss: 29.395837783813477
training batch: 5
loss: 22.38224983215332
training batch: 6
loss: 26.478927612304688
training batch: 7
loss: 24.318185806274414
training batch: 8
loss: 25.687162399291992
training batch: 9
loss: 23.98557472229004
training batch: 10
loss: 24.34482192993164
training batch: 11
loss: 23.488889694213867
training batch: 12
loss: 31.1444091796875
training batch: 13
loss: 22.567628860473633
training batch: 14
loss: 26.867258071899414
training batch: 15
loss: 30.289730072021484
training batch: 16
loss: 25.78160858154297
training batch: 17
loss: 23.56550407409668
training batch: 18
loss: 32.91071701049805
training batch: 19
loss: 23.155900955200195
training batch: 20
loss: 22.736141204833984
training batch: 21
loss: 23.293399810791016
training batch

loss: 20.54965591430664
training batch: 186
loss: 21.55147361755371
training batch: 187
loss: 17.508148193359375
training batch: 188
loss: 20.669355392456055
training batch: 189
loss: 18.887487411499023
training batch: 190
loss: 19.058799743652344
training batch: 191
loss: 21.660707473754883
training batch: 192
loss: 17.664569854736328
training batch: 193
loss: 25.76201057434082
training batch: 194
loss: 16.43332290649414
training batch: 195
loss: 16.19330596923828
training batch: 196
loss: 17.35381507873535
training batch: 197
loss: 14.345337867736816
training batch: 198
loss: 30.362377166748047
training batch: 199
loss: 30.697816848754883
training batch: 200
loss: 14.158852577209473
training batch: 201
loss: 17.525663375854492
training batch: 202
loss: 27.865140914916992
training batch: 203
loss: 18.051931381225586
training batch: 204
loss: 19.731460571289062
training batch: 205
loss: 18.739948272705078
training batch: 206
loss: 21.433448791503906
training batch: 207
loss: 16.7643127

training batch: 369
loss: 22.528697967529297
training batch: 370
loss: 16.98255729675293
training batch: 371
loss: 19.59334373474121
training batch: 372
loss: 18.222740173339844
training batch: 373
loss: 16.681190490722656
training batch: 374
loss: 17.58414077758789
training batch: 375
loss: 22.108219146728516
training batch: 376
loss: 17.105899810791016
training batch: 377
loss: 18.613941192626953
training batch: 378
loss: 18.900550842285156
training batch: 379
loss: 20.335556030273438
training batch: 380
loss: 23.651813507080078
training batch: 381
loss: 15.285684585571289
training batch: 382
loss: 13.403733253479004
training batch: 383
loss: 14.181280136108398
training batch: 384
loss: 20.63881492614746
training batch: 385
loss: 16.865245819091797
training batch: 386
loss: 23.32706642150879
training batch: 387
loss: 16.142147064208984
training batch: 388
loss: 22.433135986328125
training batch: 389
loss: 12.974493980407715
training batch: 390
loss: 16.640085220336914
training batch:

loss: 14.667336463928223
training batch: 553
loss: 21.36893653869629
training batch: 554
loss: 19.26154136657715
training batch: 555
loss: 15.420910835266113
training batch: 556
loss: 12.080066680908203
training batch: 557
loss: 13.141280174255371
training batch: 558
loss: 15.516475677490234
training batch: 559
loss: 20.195114135742188
training batch: 560
loss: 23.175397872924805
training batch: 561
loss: 14.15195083618164
training batch: 562
loss: 20.390995025634766
training batch: 563
loss: 14.869572639465332
training batch: 564
loss: 27.879241943359375
training batch: 565
loss: 13.903349876403809
training batch: 566
loss: 20.48581314086914
training batch: 567
loss: 23.99469566345215
training batch: 568
loss: 15.956119537353516
training batch: 569
loss: 19.27959442138672
training batch: 570
loss: 23.552209854125977
training batch: 571
loss: 16.99195098876953
training batch: 572
loss: 21.111652374267578
training batch: 573
loss: 16.244443893432617
training batch: 574
loss: 12.54117488

training batch: 736
loss: 17.963626861572266
training batch: 737
loss: 14.71817398071289
training batch: 738
loss: 14.86097526550293
training batch: 739
loss: 13.459993362426758
training batch: 740
loss: 10.6502103805542
training batch: 741
loss: 9.553753852844238
training batch: 742
loss: 15.08631706237793
training batch: 743
loss: 13.945765495300293
training batch: 744
loss: 19.750537872314453
training batch: 745
loss: 18.60654067993164
training batch: 746
loss: 9.754679679870605
training batch: 747
loss: 12.847543716430664
training batch: 748
loss: 13.988996505737305
training batch: 749
loss: 15.28848934173584
training batch: 750
loss: 13.624580383300781
training batch: 751
loss: 15.663421630859375
training batch: 752
loss: 10.212027549743652
training batch: 753
loss: 13.497169494628906
training batch: 754
loss: 11.263381958007812
training batch: 755
loss: 14.004117965698242
training batch: 756
loss: 11.194244384765625
training batch: 757
loss: 18.026521682739258
training batch: 758

loss: 16.00389862060547
training batch: 920
loss: 21.69121742248535
training batch: 921
loss: 17.681310653686523
training batch: 922
loss: 13.097302436828613
training batch: 923
loss: 20.09868812561035
training batch: 924
loss: 15.404420852661133
training batch: 925
loss: 13.14616584777832
training batch: 926
loss: 12.818965911865234
training batch: 927
loss: 13.801538467407227
training batch: 928
loss: 12.42318344116211
training batch: 929
loss: 18.003496170043945
training batch: 930
loss: 14.988497734069824
training batch: 931
loss: 16.46489906311035
training batch: 932
loss: 20.121307373046875
training batch: 933
loss: 15.51551342010498
training batch: 934
loss: 15.720823287963867
training batch: 935
loss: 11.977351188659668
training batch: 936
loss: 18.53428077697754
training batch: 937
loss: 16.939979553222656
training batch: 938
loss: 15.363306045532227
training batch: 939
loss: 11.208531379699707
training batch: 940
loss: 11.177066802978516
training batch: 941
loss: 12.725880622

training batch: 1101
loss: 23.139299392700195
training batch: 1102
loss: 12.544316291809082
training batch: 1103
loss: 14.830817222595215
training batch: 1104
loss: 15.256254196166992
training batch: 1105
loss: 17.58188819885254
training batch: 1106
loss: 13.130270004272461
training batch: 1107
loss: 11.276809692382812
training batch: 1108
loss: 15.3273286819458
training batch: 1109
loss: 13.224203109741211
training batch: 1110
loss: 17.761917114257812
training batch: 1111
loss: 21.090303421020508
training batch: 1112
loss: 12.202629089355469
training batch: 1113
loss: 11.966109275817871
training batch: 1114
loss: 11.11622428894043
training batch: 1115
loss: 10.364911079406738
training batch: 1116
loss: 13.572330474853516
training batch: 1117
loss: 11.892731666564941
training batch: 1118
loss: 13.28028678894043
training batch: 1119
loss: 14.178081512451172
training batch: 1120
loss: 10.108481407165527
training batch: 1121
loss: 14.495643615722656
training batch: 1122
loss: 26.724939346

loss: 16.389909744262695
loss: 21.563934326171875
loss: 21.132232666015625
loss: 16.50819969177246
loss: 19.007259368896484
loss: 19.721858978271484
loss: 17.64324188232422
loss: 26.73457145690918
loss: 19.131616592407227
loss: 18.25959587097168
loss: 14.670434951782227
loss: 17.759227752685547
loss: 12.576420783996582
loss: 11.703911781311035
loss: 15.199956893920898
loss: 18.89332389831543
loss: 15.756871223449707
loss: 18.78363609313965
loss: 15.587141036987305
loss: 15.291585922241211
loss: 16.60714340209961
loss: 17.387645721435547
loss: 12.733396530151367
loss: 9.918303489685059
loss: 21.756492614746094
loss: 19.926843643188477
loss: 15.572040557861328
loss: 13.959986686706543
loss: 25.252426147460938
loss: 12.400129318237305
loss: 13.123283386230469
loss: 12.963125228881836
loss: 22.428115844726562
loss: 18.67385482788086
loss: 17.710132598876953
loss: 12.777389526367188
loss: 19.58942985534668
loss: 4.471694052219391
Time elapsed 102m 9s
valid Loss: 1.1182 Acc: 0.5932
Optimizer

training batch: 163
loss: 11.73717975616455
training batch: 164
loss: 13.735350608825684
training batch: 165
loss: 12.714531898498535
training batch: 166
loss: 14.172149658203125
training batch: 167
loss: 15.913678169250488
training batch: 168
loss: 20.986217498779297
training batch: 169
loss: 12.77017593383789
training batch: 170
loss: 15.615519523620605
training batch: 171
loss: 11.787825584411621
training batch: 172
loss: 10.774916648864746
training batch: 173
loss: 11.118422508239746
training batch: 174
loss: 7.1779327392578125
training batch: 175
loss: 12.064799308776855
training batch: 176
loss: 12.171817779541016
training batch: 177
loss: 10.737451553344727
training batch: 178
loss: 14.350667953491211
training batch: 179
loss: 13.0480318069458
training batch: 180
loss: 9.375332832336426
training batch: 181
loss: 12.175873756408691
training batch: 182
loss: 8.89855670928955
training batch: 183
loss: 9.009957313537598
training batch: 184
loss: 8.963339805603027
training batch: 185

loss: 12.32005500793457
training batch: 348
loss: 13.938251495361328
training batch: 349
loss: 12.224266052246094
training batch: 350
loss: 11.32674789428711
training batch: 351
loss: 11.098198890686035
training batch: 352
loss: 12.463406562805176
training batch: 353
loss: 22.62743377685547
training batch: 354
loss: 11.566463470458984
training batch: 355
loss: 15.545154571533203
training batch: 356
loss: 7.197902679443359
training batch: 357
loss: 10.567437171936035
training batch: 358
loss: 11.803709030151367
training batch: 359
loss: 12.729403495788574
training batch: 360
loss: 22.306516647338867
training batch: 361
loss: 14.029241561889648
training batch: 362
loss: 9.984901428222656
training batch: 363
loss: 10.614884376525879
training batch: 364
loss: 15.270330429077148
training batch: 365
loss: 14.660117149353027
training batch: 366
loss: 4.108043670654297
training batch: 367
loss: 8.287973403930664
training batch: 368
loss: 8.682121276855469
training batch: 369
loss: 13.924691200

loss: 12.281939506530762
training batch: 532
loss: 8.78018856048584
training batch: 533
loss: 6.961150169372559
training batch: 534
loss: 12.948448181152344
training batch: 535
loss: 13.51689338684082
training batch: 536
loss: 15.238245010375977
training batch: 537
loss: 13.002370834350586
training batch: 538
loss: 15.943593978881836
training batch: 539
loss: 15.695660591125488
training batch: 540
loss: 13.542417526245117
training batch: 541
loss: 13.592727661132812
training batch: 542
loss: 5.3039631843566895
training batch: 543
loss: 12.42711067199707
training batch: 544
loss: 17.34766387939453
training batch: 545
loss: 16.7669677734375
training batch: 546
loss: 17.44626808166504
training batch: 547
loss: 18.891029357910156
training batch: 548
loss: 10.818404197692871
training batch: 549
loss: 16.574647903442383
training batch: 550
loss: 14.713963508605957
training batch: 551
loss: 11.093128204345703
training batch: 552
loss: 9.09820556640625
training batch: 553
loss: 19.359249114990

loss: 18.30373764038086
training batch: 716
loss: 8.614450454711914
training batch: 717
loss: 16.11905860900879
training batch: 718
loss: 5.715277671813965
training batch: 719
loss: 16.69138526916504
training batch: 720
loss: 12.141897201538086
training batch: 721
loss: 9.56399154663086
training batch: 722
loss: 11.57707691192627
training batch: 723
loss: 9.986093521118164
training batch: 724
loss: 14.27908706665039
training batch: 725
loss: 14.637971878051758
training batch: 726
loss: 17.79862403869629
training batch: 727
loss: 9.604517936706543
training batch: 728
loss: 6.526686191558838
training batch: 729
loss: 7.706634998321533
training batch: 730
loss: 17.082855224609375
training batch: 731
loss: 14.67841625213623
training batch: 732
loss: 9.867231369018555
training batch: 733
loss: 7.245619773864746
training batch: 734
loss: 11.516864776611328
training batch: 735
loss: 10.984591484069824
training batch: 736
loss: 13.93133544921875
training batch: 737
loss: 10.85108757019043
trai

training batch: 900
loss: 8.470159530639648
training batch: 901
loss: 14.01887035369873
training batch: 902
loss: 10.08749771118164
training batch: 903
loss: 9.744464874267578
training batch: 904
loss: 11.019033432006836
training batch: 905
loss: 9.830982208251953
training batch: 906
loss: 14.887774467468262
training batch: 907
loss: 11.777033805847168
training batch: 908
loss: 14.493738174438477
training batch: 909
loss: 10.401707649230957
training batch: 910
loss: 10.851692199707031
training batch: 911
loss: 12.102266311645508
training batch: 912
loss: 6.5594587326049805
training batch: 913
loss: 9.923977851867676
training batch: 914
loss: 20.4314022064209
training batch: 915
loss: 4.740285396575928
training batch: 916
loss: 8.570390701293945
training batch: 917
loss: 11.387128829956055
training batch: 918
loss: 13.421825408935547
training batch: 919
loss: 12.723804473876953
training batch: 920
loss: 11.930896759033203
training batch: 921
loss: 13.972661972045898
training batch: 922


loss: 11.04462718963623
training batch: 1083
loss: 12.595905303955078
training batch: 1084
loss: 8.856222152709961
training batch: 1085
loss: 9.054952621459961
training batch: 1086
loss: 20.670326232910156
training batch: 1087
loss: 12.100349426269531
training batch: 1088
loss: 9.086579322814941
training batch: 1089
loss: 10.213047981262207
training batch: 1090
loss: 11.634917259216309
training batch: 1091
loss: 17.390426635742188
training batch: 1092
loss: 21.68939971923828
training batch: 1093
loss: 11.878605842590332
training batch: 1094
loss: 12.096702575683594
training batch: 1095
loss: 10.66447639465332
training batch: 1096
loss: 16.585683822631836
training batch: 1097
loss: 11.592592239379883
training batch: 1098
loss: 12.553245544433594
training batch: 1099
loss: 11.323709487915039
training batch: 1100
loss: 10.568306922912598
training batch: 1101
loss: 11.579878807067871
training batch: 1102
loss: 13.108783721923828
training batch: 1103
loss: 13.226829528808594
training batch:

loss: 9.515496253967285
loss: 11.19375228881836
loss: 16.70037269592285
loss: 17.906904220581055
loss: 14.578581809997559
loss: 19.828935623168945
loss: 27.900068283081055
loss: 9.82591438293457
loss: 11.139710426330566
loss: 14.365130424499512
loss: 10.804981231689453
loss: 11.168252944946289
loss: 14.233119010925293
loss: 18.432804107666016
loss: 17.02600860595703
loss: 17.128826141357422
loss: 10.210487365722656
loss: 8.663467407226562
loss: 9.39728832244873
loss: 14.625572204589844
loss: 3.596367597579956
loss: 7.9901580810546875
loss: 11.820087432861328
loss: 3.877650260925293
loss: 13.36899471282959
loss: 20.62299156188965
loss: 15.543375968933105
loss: 6.097235202789307
loss: 11.657087326049805
loss: 10.421759605407715
loss: 6.997313499450684
loss: 13.749722480773926
loss: 9.4407958984375
loss: 16.19952392578125
loss: 10.940629959106445
loss: 15.642273902893066
loss: 12.640092849731445
loss: 18.156816482543945
loss: 6.345635414123535
loss: 8.055890083312988
loss: 13.536846160888

training batch: 145
loss: 7.969077110290527
training batch: 146
loss: 14.22854232788086
training batch: 147
loss: 7.636595249176025
training batch: 148
loss: 6.4186320304870605
training batch: 149
loss: 5.37919282913208
training batch: 150
loss: 6.996083736419678
training batch: 151
loss: 10.448698997497559
training batch: 152
loss: 8.644837379455566
training batch: 153
loss: 10.876934051513672
training batch: 154
loss: 5.7399001121521
training batch: 155
loss: 8.70197582244873
training batch: 156
loss: 7.65027379989624
training batch: 157
loss: 30.422243118286133
training batch: 158
loss: 8.431421279907227
training batch: 159
loss: 10.932380676269531
training batch: 160
loss: 6.540602684020996
training batch: 161
loss: 7.795036792755127
training batch: 162
loss: 17.277698516845703
training batch: 163
loss: 7.04840612411499
training batch: 164
loss: 11.971572875976562
training batch: 165
loss: 12.288442611694336
training batch: 166
loss: 8.657454490661621
training batch: 167
loss: 8.29

loss: 10.94687557220459
training batch: 331
loss: 14.897769927978516
training batch: 332
loss: 18.758216857910156
training batch: 333
loss: 9.96860122680664
training batch: 334
loss: 8.722162246704102
training batch: 335
loss: 11.489087104797363
training batch: 336
loss: 8.438353538513184
training batch: 337
loss: 9.941637992858887
training batch: 338
loss: 14.150693893432617
training batch: 339
loss: 9.768178939819336
training batch: 340
loss: 11.80346965789795
training batch: 341
loss: 11.278983116149902
training batch: 342
loss: 15.549488067626953
training batch: 343
loss: 14.957433700561523
training batch: 344
loss: 11.317959785461426
training batch: 345
loss: 13.242778778076172
training batch: 346
loss: 11.699722290039062
training batch: 347
loss: 12.106325149536133
training batch: 348
loss: 11.718260765075684
training batch: 349
loss: 18.38355255126953
training batch: 350
loss: 6.721040725708008
training batch: 351
loss: 9.764030456542969
training batch: 352
loss: 13.934625625610

loss: 14.46070384979248
training batch: 516
loss: 7.755136489868164
training batch: 517
loss: 5.816028118133545
training batch: 518
loss: 13.168590545654297
training batch: 519
loss: 16.754558563232422
training batch: 520
loss: 13.384136199951172
training batch: 521
loss: 8.063023567199707
training batch: 522
loss: 8.873810768127441
training batch: 523
loss: 13.491902351379395
training batch: 524
loss: 14.09134578704834
training batch: 525
loss: 11.835134506225586
training batch: 526
loss: 13.48355484008789
training batch: 527
loss: 11.11889934539795
training batch: 528
loss: 10.625374794006348
training batch: 529
loss: 7.8227081298828125
training batch: 530
loss: 12.675217628479004
training batch: 531
loss: 12.925233840942383
training batch: 532
loss: 6.6265435218811035
training batch: 533
loss: 12.62117862701416
training batch: 534
loss: 11.248434066772461
training batch: 535
loss: 5.803254127502441
training batch: 536
loss: 6.505425453186035
training batch: 537
loss: 8.6638078689575

loss: 8.043715476989746
training batch: 701
loss: 6.874014377593994
training batch: 702
loss: 10.806127548217773
training batch: 703
loss: 11.124871253967285
training batch: 704
loss: 5.913473129272461
training batch: 705
loss: 9.546570777893066
training batch: 706
loss: 13.46871566772461
training batch: 707
loss: 5.000597953796387
training batch: 708
loss: 11.405814170837402
training batch: 709
loss: 9.269760131835938
training batch: 710
loss: 2.9997384548187256
training batch: 711
loss: 9.61774730682373
training batch: 712
loss: 6.454771995544434
training batch: 713
loss: 12.240571975708008
training batch: 714
loss: 11.248555183410645
training batch: 715
loss: 10.658366203308105
training batch: 716
loss: 10.199241638183594
training batch: 717
loss: 7.558446884155273
training batch: 718
loss: 8.98885440826416
training batch: 719
loss: 11.148770332336426
training batch: 720
loss: 16.351499557495117
training batch: 721
loss: 9.870199203491211
training batch: 722
loss: 17.020709991455078

training batch: 885
loss: 8.297076225280762
training batch: 886
loss: 12.897890090942383
training batch: 887
loss: 7.162349700927734
training batch: 888
loss: 7.704441070556641
training batch: 889
loss: 15.787281036376953
training batch: 890
loss: 11.590184211730957
training batch: 891
loss: 10.133354187011719
training batch: 892
loss: 13.137703895568848
training batch: 893
loss: 8.44951343536377
training batch: 894
loss: 10.038952827453613
training batch: 895
loss: 9.30411434173584
training batch: 896
loss: 8.323946952819824
training batch: 897
loss: 14.281220436096191
training batch: 898
loss: 7.34967041015625
training batch: 899
loss: 10.188851356506348
training batch: 900
loss: 10.1066312789917
training batch: 901
loss: 6.997499942779541
training batch: 902
loss: 10.372798919677734
training batch: 903
loss: 7.618124008178711
training batch: 904
loss: 7.243332386016846
training batch: 905
loss: 12.062018394470215
training batch: 906
loss: 11.467280387878418
training batch: 907
loss:

training batch: 1069
loss: 9.19843578338623
training batch: 1070
loss: 11.157155990600586
training batch: 1071
loss: 14.069208145141602
training batch: 1072
loss: 10.553524017333984
training batch: 1073
loss: 15.46757984161377
training batch: 1074
loss: 10.103829383850098
training batch: 1075
loss: 13.36059284210205
training batch: 1076
loss: 7.67812442779541
training batch: 1077
loss: 11.90479564666748
training batch: 1078
loss: 10.216846466064453
training batch: 1079
loss: 4.7452521324157715
training batch: 1080
loss: 7.402975082397461
training batch: 1081
loss: 20.53007698059082
training batch: 1082
loss: 9.539932250976562
training batch: 1083
loss: 14.82689380645752
training batch: 1084
loss: 12.891441345214844
training batch: 1085
loss: 12.7808837890625
training batch: 1086
loss: 15.368687629699707
training batch: 1087
loss: 11.193580627441406
training batch: 1088
loss: 13.525153160095215
training batch: 1089
loss: 7.814999580383301
training batch: 1090
loss: 7.007881164550781
tra

loss: 12.707271575927734
loss: 13.571386337280273
loss: 14.96927261352539
loss: 13.218232154846191
loss: 8.83934497833252
loss: 9.516663551330566
loss: 16.16805648803711
loss: 18.399580001831055
loss: 14.092866897583008
loss: 8.661556243896484
loss: 5.398309707641602
loss: 14.06759262084961
loss: 14.533527374267578
loss: 9.828156471252441
loss: 12.86407470703125
loss: 6.508637428283691
loss: 13.53652572631836
loss: 9.24994945526123
loss: 9.213696479797363
loss: 14.892698287963867
loss: 9.040246963500977
loss: 7.103085517883301
loss: 6.511462211608887
loss: 7.562855243682861
loss: 18.717742919921875
loss: 7.279775619506836
loss: 7.792666912078857
loss: 7.662753105163574
loss: 19.584041595458984
loss: 5.116283893585205
loss: 6.103470325469971
loss: 14.492260932922363
loss: 10.13575553894043
loss: 5.219133377075195
loss: 17.036163330078125
loss: 24.548851013183594
loss: 13.152303695678711
loss: 17.16376495361328
loss: 11.658858299255371
loss: 9.359258651733398
loss: 6.827568531036377
loss

In [38]:
val_acc_history, train_acc_history, valid_losses, train_losses

([tensor(0.5932, dtype=torch.float64),
  tensor(0.6958, dtype=torch.float64),
  tensor(0.7426, dtype=torch.float64)],
 [tensor(0.6001, dtype=torch.float64),
  tensor(0.7181, dtype=torch.float64),
  tensor(0.7460, dtype=torch.float64)],
 [1.1181672937925726, 0.8033024129029867, 0.7077667169610786],
 [1.0532620828007693, 0.7730043802002524, 0.6924602622743067])

In [None]:
class Lenet5(nn.Module):
    def __int__(self):
        super(Lenet5, self).__int__()
        
        sel.model = nn.Sequential(
            # x: [b, 3, 64, 64] => [b, 3, 64, 64]
            nn.Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=1)
            nn.MaxPool2d(kernel_size=2, stride=2, padding=0      
            #
            
            
            
        )