In [1]:
import sklearn.datasets
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

mnist = sklearn.datasets.fetch_openml('mnist_784', data_home="mnist_784")
train_X = torch.tensor(mnist.data[:60000], dtype=torch.float) / 255
train_Y = torch.tensor([int(x) for x in mnist.target[:60000]])
val_X = torch.tensor(mnist.data[60000:], dtype=torch.float) / 255
val_Y = torch.tensor([int(x) for x in mnist.target[60000:]])

In [3]:
from collections import namedtuple

train_X = train_X.view(60000, 1, 28, 28)
val_X = val_X.view(10000, 1, 28, 28)

batch_size = 100
lr = 0.0001
epoch = 100
train_X = train_X.view(batch_size, -1, 28, 28)
train_Y = train_Y.view(batch_size, -1)

LayerBlock = namedtuple('LayerBlock', ['num_repeats', 'num_filters', 'bottleneck_size'])
blocks = [LayerBlock(3, 128, 32), LayerBlock(3, 256, 64), LayerBlock(3, 512, 128), LayerBlock(3, 1024, 256)]

class Layers(nn.Module):
    def __init__(self, filter, bottleneck):
        super(Layers, self).__init__()
        self.layers = nn.Sequential(
            nn.Conv2d(filter, bottleneck, kernel_size=(1, 1)),
            nn.ReLU(),
            nn.Conv2d(bottleneck, bottleneck, kernel_size=(3, 3), padding=1),
            nn.ReLU(),
            nn.Conv2d(bottleneck, filter, kernel_size=(1, 1)),
            nn.ReLU()
        )
        self.upscale = nn.Conv2d(filter, filter*2, kernel_size=(1, 1), bias=False)

class ResNet(nn.Module):
    def __init__(self, block):
        super(ResNet, self).__init__()
        self.ready = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=(8, 8), stride=2, padding=2),#14
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(2, 2), stride=2, padding=1),  #7, 7
            nn.Conv2d(64, block[0].num_filters, kernel_size=(1, 1)) #7, 7
        )
        self.layer1 = Layers(block[0].num_filters, block[0].bottleneck_size)
        self.layer2 = Layers(block[1].num_filters, block[1].bottleneck_size)
        self.layer3 = Layers(block[2].num_filters, block[2].bottleneck_size)
        self.linear = nn.Linear(1024, 10)
  
    def forward(self, x):
        out = self.ready(x)
        out1 = self.layer1.layers(out)
        output = out + out1
        out = self.layer1.upscale(output)
        out2 = self.layer2.layers(out)
        output = out + out2
        out = self.layer2.upscale(output)
        out3 = self.layer3.layers(out)
        output = out + out3
        out = self.layer3.upscale(output)
        out = nn.functional.avg_pool2d(out, kernel_size=(out.size()[2], out.size()[3]))
        out = torch.flatten(out, start_dim=1)
        out = self.linear(out)
        return out

model = ResNet(blocks)
model = model.cuda()
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(params=model.parameters(), lr=lr)

for ep in range(epoch):
    print('epoch : ', ep+1)
    avg_cost = 0
    for batch in range(batch_size):
        optimizer.zero_grad()
        X = train_X[batch]
        X = X.unsqueeze(1).cuda()
        y_pred = model.forward(X).cuda()
        cost = loss(y_pred, train_Y[batch].cuda())
        cost.backward()
        optimizer.step()
        avg_cost += cost
        torch.cuda.empty_cache()
    print(avg_cost / batch_size)

epoch :  1
tensor(2.0786, device='cuda:0', grad_fn=<DivBackward0>)
epoch :  2
tensor(1.7401, device='cuda:0', grad_fn=<DivBackward0>)
epoch :  3
tensor(1.3541, device='cuda:0', grad_fn=<DivBackward0>)
epoch :  4
tensor(0.8524, device='cuda:0', grad_fn=<DivBackward0>)
epoch :  5
tensor(0.5628, device='cuda:0', grad_fn=<DivBackward0>)
epoch :  6
tensor(0.4400, device='cuda:0', grad_fn=<DivBackward0>)
epoch :  7
tensor(0.3769, device='cuda:0', grad_fn=<DivBackward0>)
epoch :  8
tensor(0.3325, device='cuda:0', grad_fn=<DivBackward0>)
epoch :  9
tensor(0.2994, device='cuda:0', grad_fn=<DivBackward0>)
epoch :  10
tensor(0.2732, device='cuda:0', grad_fn=<DivBackward0>)
epoch :  11
tensor(0.2513, device='cuda:0', grad_fn=<DivBackward0>)
epoch :  12
tensor(0.2329, device='cuda:0', grad_fn=<DivBackward0>)
epoch :  13
tensor(0.2171, device='cuda:0', grad_fn=<DivBackward0>)
epoch :  14
tensor(0.2033, device='cuda:0', grad_fn=<DivBackward0>)
epoch :  15
tensor(0.1910, device='cuda:0', grad_fn=<DivB

In [28]:
T, F = 0, 0
for i in range(batch_size):
    X = train_X[i].unsqueeze(1).cuda()
    y_pred = model.forward(X).argmax(axis=1)
    Y = train_Y[i].cuda()
    for j in range(len(y_pred)):
        if y_pred[j] == Y[j]:
            T += 1
        else:
            F += 1
    torch.cuda.empty_cache()
print("Test정확도 : ",T / (T+F) * 100)

Test정확도 :  98.26333333333334


In [29]:
val_X = val_X.view(batch_size, -1, 28, 28)
val_Y = val_Y.view(batch_size, -1)
T, F = 0, 0
for i in range(batch_size):
    test_X = val_X[i].unsqueeze(1).cuda()
    test_pred = model.forward(test_X).argmax(axis=1)
    test_Y = val_Y[i].cuda()
    for j in range(len(test_pred)):
        if test_pred[j] == test_Y[j]:
            T += 1
        else:
            F += 1
    torch.cuda.empty_cache()
print("Validation정확도 : ", T / (T+F) * 100)

Validation정확도 :  98.00999999999999
