In [1]:
import sklearn.datasets
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt

mnist = sklearn.datasets.fetch_openml('mnist_784', data_home="mnist_784")
train_X = torch.tensor(mnist.data[:60000], dtype=torch.float) / 255
train_Y = torch.tensor([int(x) for x in mnist.target[:60000]])
val_X = torch.tensor(mnist.data[60000:], dtype=torch.float) / 255
val_Y = torch.tensor([int(x) for x in mnist.target[60000:]])

In [2]:
from collections import namedtuple

train_X = train_X.view(60000, 1, 28, 28)
val_X = val_X.view(10000, 1, 28, 28)

batch_size = 500
lr = 0.0001
epoch = 10
train_X = train_X.view(batch_size, -1, 28, 28)
train_Y = train_Y.view(batch_size, -1)

LayerBlock = namedtuple('LayerBlock', ['num_repeats', 'num_filters', 'bottleneck_size'])
blocks = [LayerBlock(3, 128, 32), LayerBlock(3, 256, 64), LayerBlock(3, 512, 128), LayerBlock(3, 1024, 256)]

class Layers(nn.Module):
    def __init__(self, filter, bottleneck):
        super(Layers, self).__init__()
        self.layers = nn.Sequential(
            nn.Conv2d(filter, bottleneck, kernel_size=(1, 1)),
            nn.ReLU(),
            nn.Conv2d(bottleneck, bottleneck, kernel_size=(3, 3), padding=1),
            nn.ReLU(),
            nn.Conv2d(bottleneck, filter, kernel_size=(1, 1)),
            nn.ReLU()
        )
        self.upscale = nn.Conv2d(filter, filter*2, kernel_size=(1, 1), bias=False)

class ResNet(nn.Module):
    def __init__(self, block):
        super(ResNet, self).__init__()
        self.ready = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=(7, 7), padding=3),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=(3, 3), stride=1, padding=1),
            nn.Conv2d(64, block[0].num_filters, kernel_size=(1, 1))
        )
        self.layer1 = Layers(block[0].num_filters, block[0].bottleneck_size)
        self.layer2 = Layers(block[1].num_filters, block[1].bottleneck_size)
        self.layer3 = Layers(block[2].num_filters, block[2].bottleneck_size)
        self.linear = nn.Linear(1024*28*28, 10)
  
    def forward(self, x):
        out = self.ready(x)
        out1 = self.layer1.layers(out)
        output = out + out1
        out = self.layer1.upscale(output)
        out2 = self.layer2.layers(out)
        output = out + out2
        out = self.layer2.upscale(output)
        out3 = self.layer3.layers(out)
        output = out + out3
        out = self.layer3.upscale(output)
        out = torch.flatten(out, start_dim=1)
        out = self.linear(out)
        return out

model = ResNet(blocks)
model = nn.DataParallel(model)
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(params=model.parameters(), lr=lr)

for ep in range(epoch):
    print('epoch : ', ep+1)
    avg_cost = 0
    for batch in range(batch_size):
        optimizer.zero_grad()
        X = train_X[batch]
        X = X.unsqueeze(1)
        y_pred = model.forward(X)
        cost = loss(y_pred, train_Y[batch])
        cost.backward()
        optimizer.step()
        avg_cost += cost
    print(avg_cost / batch_size)

epoch :  1


RuntimeError: CUDA out of memory. Tried to allocate 184.00 MiB (GPU 0; 2.00 GiB total capacity; 1.18 GiB already allocated; 51.28 MiB free; 1.20 GiB reserved in total by PyTorch)

In [11]:
val_X = val_X.view(batch_size, -1, 28, 28)
T, F = 0, 0
for i in range(batch_size):
    test_X = val_X[i]
    test_X = test_X.unsqueeze(1)
    test_pred = model.forward(test_X)
    test_pred = test_pred.argmax(axis=1)

    for j in range(len(test_pred)):
        if test_pred[j] == val_Y[20*i+j]:
            T += 1
        else:
            F += 1
print(T / (T+F))

0.987
