In [1]:
from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import numba
from dataset.dataloader import *
#from vgg8 import *


In [17]:
class torch_VGG8(nn.Module):
    def __init__(self):
        super(torch_VGG8, self,).__init__()
        # Layer 1 (B, 1, 28, 28) -> (B, 64, 28, 28)
        self.Layer1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1),
            nn.ReLU(True)
        )
        # Layer 2 (B, 64, 28, 28) -> (B, 128, 14, 14)
        self.Layer2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        # Layer 3 (B, 128, 14, 14) -> (B, 128, 14, 14)
        self.Layer3 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(True)
        )
        # Layer 4 (B, 128, 14, 14) -> (B, 256, 7, 7)
        self.Layer4 =nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        # Layer 5 (B, 256, 7, 7) -> (B, 512, 7, 7)
        self.Layer5 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(True)
        )
        # Layer 6 (B, 512, 7, 7) -> (B, 512, 7, 7)
        self.Layer6 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(True)
        )
        # Layer 7 (B, 512*7*7)
        self.Layer7 = nn.Sequential(
            nn.Linear(in_features=256*7*7, out_features=256, bias=True,),
            nn.ReLU(True)
        )
        # Layer 8
        self.Layer8 = nn.Linear(in_features=256, out_features=10, bias=True,)

    def forward(self, x):
        x = self.Layer1(x)
        x = self.Layer2(x)
        x = self.Layer3(x)
        x = self.Layer4(x)
        x = self.Layer5(x)
        x = self.Layer6(x)
        x = x.view(x.size(0), -1)
        x = self.Layer7(x)
        x = self.Layer8(x)
        return x

In [18]:
if (torch.cuda.is_available()):
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
torch_vgg8 = torch_VGG8().to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(torch_vgg8.parameters(), lr=0.01, momentum=0.9)

In [19]:
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

In [29]:
print(t_train.shape)
batch_size = 10
for epoch in range(10):
    running_loss = 0.0
    accuracy = 0.0
    train_size = x_train.shape[0]
    for i in tqdm(range(0, train_size, batch_size)):
        x = torch.Tensor(x_train[i:i+batch_size]).reshape(batch_size, 1, 28, 28).to(device)   # B, C, H, W
        labels = torch.Tensor(t_train[i:i+batch_size]).to(device)

        outputs = torch_vgg8(x)
        labels = labels.reshape_as(outputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    print('[%d] loss: %.3f' %
            (epoch + 1, running_loss / 50))
    running_loss = 0.0

(60000, 10)


 13%|█▎        | 800/6000 [00:05<00:34, 150.77it/s]

In [75]:
# Load model
model = VGG8()

# Load MNIST Dataset
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

# Hyper Params
iters_num = 10000
train_size  = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

for i in range(iters_num):
    # mini batch
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    # Calc grads
    grad = 


SyntaxError: invalid syntax (828416488.py, line 20)