In [1]:
from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import numba
from dataset.dataloader import *
#from vgg8 import *

USE_CPU_ONLY = False

In [2]:
class torch_VGG8(nn.Module):
    def __init__(self):
        super(torch_VGG8, self,).__init__()
        # Layer 1 (B, 1, 28, 28) -> (B, 32, 28, 28)
        self.Layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1),
            nn.ReLU(True)
        )
        # Layer 2 (B, 32, 28, 28) -> (B, 64, 14, 14)
        self.Layer2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        # Layer 3 (B, 64, 14, 14) -> (B, 64, 14, 14)
        self.Layer3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(True)
        )
        # Layer 4 (B, 64, 14, 14) -> (B, 128, 7, 7)
        self.Layer4 =nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        # Layer 5 (B, 128, 7, 7) -> (B, 256, 7, 7)
        self.Layer5 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(True)
        )
        # Layer 6 (B, 256, 7, 7) -> (B, 256, 7, 7)
        self.Layer6 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(True)
        )
        # Layer 7 (B, 256*7*7) -> (B, 256)
        self.Layer7 = nn.Sequential(
            nn.Linear(in_features=256*7*7, out_features=256, bias=True,),
            nn.ReLU(True)
        )
        # Layer 8 (B, 256) -> (B, 10)
        self.Layer8 = nn.Sequential(
            nn.Linear(in_features=256, out_features=10, bias=True,),
        )

    def forward(self, x):
        # L1~L6: Conv + ReLU + (MaxPool)
        x = self.Layer1(x)
        x = self.Layer2(x)
        x = self.Layer3(x)
        x = self.Layer4(x)
        x = self.Layer5(x)
        x = self.Layer6(x)
        # L7: FC + ReLU
        x = x.view(x.size(0), -1)
        x = self.Layer7(x)
        # L8: FC
        x = self.Layer8(x)
        return x

In [3]:
if (not USE_CPU_ONLY and torch.cuda.is_available()):
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

torch_vgg8 = torch_VGG8().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(torch_vgg8.parameters(), lr=0.01, momentum=0.9)

In [4]:
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

In [5]:
print(t_train.shape)
batch_size = 150
for epoch in range(10):
    running_loss = 0.0
    accuracy = 0.0
    train_size = x_train.shape[0]
    for i in tqdm(range(0, train_size, batch_size)):
        x = torch.Tensor(x_train[i:i+batch_size]).reshape(batch_size, 1, 28, 28).to(device)   # B, C, H, W
        labels = torch.Tensor(t_train[i:i+batch_size]).to(device)
        optimizer.zero_grad()

        outputs = torch_vgg8(x)
        labels = labels.reshape_as(outputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        #if ((train_size / batch_size) % 10 == 0):
    print('[%d] loss: %.3f' %
            (epoch + 1, running_loss / train_size))
    running_loss = 0.0

(60000, 10)


100%|██████████| 400/400 [00:16<00:00, 24.73it/s]


[1] loss: 92.097


100%|██████████| 400/400 [00:15<00:00, 25.24it/s]


[2] loss: 92.089


100%|██████████| 400/400 [00:16<00:00, 24.98it/s]


[3] loss: 92.078


 22%|██▎       | 90/400 [00:03<00:12, 24.91it/s]


KeyboardInterrupt: 

In [None]:
print(t_train.shape)
batch_size = 200
total_correct = 0
with torch.no_grad():
    running_loss = 0.0
    accuracy = 0.0
    test_size = x_test.shape[0]
    for i in range(0, test_size, batch_size):
        x = torch.Tensor(x_test[i:i+batch_size]).reshape(batch_size, 1, 28, 28).to(device)   # B, C, H, W
        labels = torch.Tensor(t_test[i:i+batch_size]).to(device)

        outputs = torch_vgg8(x)
        labels = labels.reshape_as(outputs)
        c = (torch.argmax(labels, 1) == torch.argmax(outputs, 1)).squeeze()
        total_correct += torch.sum(c)

            #print()
print(f"Accuracy:{total_correct/test_size}")


(60000, 10)
Accuracy:0.9646000266075134
