In [1]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from tensorboardX import SummaryWriter

# GPU 자원 사용확인
devices_id = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
torch.cuda.set_device(
    devices_id
)  # fix bug for `ERROR: all tensors must be on devices[0]`

# Create Tensorboard SummaryWriter instance
writer_deep = SummaryWriter('./summary/deep_without_activation')
writer_shallow = SummaryWriter('./summary/shallow_without_activation')

# Step 1. Load Dataset
train_dataset = dsets.MNIST(
    root="../data", train=True, transform=transforms.ToTensor(), download=False
)
test_dataset = dsets.MNIST(
    root="../data", train=False, transform=transforms.ToTensor(), download=False
)

# Step 2. Make Dataset Iterable
batch_size = 100
train_loader = torch.utils.data.DataLoader(
    dataset=train_dataset, batch_size=batch_size, shuffle=True
)
test_loader = torch.utils.data.DataLoader(
    dataset=test_dataset, batch_size=batch_size, shuffle=False
)

In [2]:
# Step 3. Create Model Class
class Deep_LogisticRegression(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Deep_LogisticRegression, self).__init__()
        self.linear1 = torch.nn.Linear(input_dim, 300)
        self.linear2 = torch.nn.Linear(300, int(input_dim / 4))  # 392x196
        self.linear3 = torch.nn.Linear(int(input_dim / 4), output_dim)  # 196x10

    def forward(self, x):
        outputs = self.linear1(x)
        outputs = self.linear2(outputs)
        outputs = self.linear3(outputs)
        return outputs


# Step 3. Create Model Class
class Shallow_LogisticRegression(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Shallow_LogisticRegression, self).__init__()
        self.linear1 = torch.nn.Linear(input_dim, output_dim)

    def forward(self, x):
        outputs = self.linear1(x)
        return outputs
        

In [3]:
epochs = 30
input_dim = 784
output_dim = 10
# [test] 만일 MSE을 LOSS 함수로 쓴다면???
# output_dim = 1
lr_rate = 0.01

# Step 4. Instantiate Model Class
model_deep = Deep_LogisticRegression(input_dim, output_dim)
if devices_id == type([]):  # -> GPU
    model_deep = nn.DataParallel(model_deep, device_ids=devices_id).cuda()
else:
    model_deep = nn.DataParallel(model_deep, device_ids=[devices_id]).cuda()

model_shallow = Shallow_LogisticRegression(input_dim, output_dim)
if devices_id == type([]):  # -> GPU
    model_shallow = nn.DataParallel(model_shallow, device_ids=devices_id).cuda()
else:
    model_shallow = nn.DataParallel(model_shallow, device_ids=[devices_id]).cuda()

# Step 5. Instantiate Loss Class
criterion = torch.nn.CrossEntropyLoss()  # computes softmax and then the cross entropy
# Step 6. Instantiate Optimizer Class
optimizer_deep = torch.optim.SGD(model_deep.parameters(), lr=lr_rate)
optimizer_shallow = torch.optim.SGD(model_shallow.parameters(), lr=lr_rate)

In [4]:
# Step 7. Train Model
# 임의의 학습 이미지를 가져옵니다
dataiter = iter(train_loader)
images, _ = dataiter.next()
writer_deep.add_graph(model_deep, images.view(-1, 28 * 28))
writer_shallow.add_graph(model_shallow, images.view(-1, 28 * 28))

loss_deep = 0
loss_shallow = 0
total_iter = 0

for epoch in range(int(epochs)):
    iter = 0
    for i, (images, labels) in enumerate(train_loader):
        images = images.view(-1, 28 * 28)
        labels = labels
        images = images.to(devices_id)
        labels = labels.to(devices_id)

        optimizer_deep.zero_grad()
        optimizer_shallow.zero_grad()
        outputs_deep = model_deep(images)
        outputs_shallow = model_shallow(images)
        # Calc loss
        loss_deep = criterion(outputs_deep, labels)
        loss_shallow = criterion(outputs_shallow, labels)
        # Back-propagation
        loss_deep.backward()
        loss_shallow.backward()
        # Updating wegihts
        optimizer_deep.step()
        optimizer_shallow.step()

        total_iter += 1
        if total_iter < int(600*epochs - 10):
            writer_deep.add_scalar('Train/Loss', loss_deep, total_iter)
            writer_shallow.add_scalar('Train/Loss', loss_shallow, total_iter)

        iter += 1
        if iter % 200 == 0:
            # calculate Accuracy
            correct_deep = 0
            correct_shallow = 0
            total = 0

            for images, labels in test_loader:
                images = images.view(-1, 28 * 28)
                images = images.to(devices_id)
                
                outputs_deep = model_deep(images)
                outputs_shallow = model_shallow(images)

                _, predicted_deep = torch.max(outputs_deep.data, 1)
                _, predicted_shallow = torch.max(outputs_shallow.data, 1)
                total += labels.size(0)
                # for gpu, bring the predicted and labels back to cpu fro python operations to work
                predicted_deep = predicted_deep.cpu()
                predicted_shallow = predicted_shallow.cpu()

                correct_deep += (predicted_deep == labels).sum()
                correct_shallow += (predicted_shallow == labels).sum()

            accuracy_deep = 100 * correct_deep / total
            accuracy_shallow = 100 * correct_shallow / total

            print(
                f"[Deep] [Epoch {epoch}] [Iteration: {i}/{len(train_loader)}] [Loss: {loss_deep.item():.3f}] [Accuracy: {accuracy_deep:.2f}]"
            )
            print(
                f"[Shal] [Epoch {epoch}] [Iteration: {i}/{len(train_loader)}] [Loss: {loss_shallow.item():.3f}] [Accuracy: {accuracy_shallow:.2f}]"
            )

writer_deep.close()
writer_shallow.close()

[Deep] [Epoch 0] [Iteration: 199/600] [Loss: 1.869] [Accuracy: 72.24]
[Shal] [Epoch 0] [Iteration: 199/600] [Loss: 1.284] [Accuracy: 80.36]
[Deep] [Epoch 0] [Iteration: 399/600] [Loss: 1.148] [Accuracy: 77.10]
[Shal] [Epoch 0] [Iteration: 399/600] [Loss: 0.933] [Accuracy: 84.13]
[Deep] [Epoch 0] [Iteration: 599/600] [Loss: 0.840] [Accuracy: 82.89]
[Shal] [Epoch 0] [Iteration: 599/600] [Loss: 0.841] [Accuracy: 85.89]
[Deep] [Epoch 1] [Iteration: 199/600] [Loss: 0.589] [Accuracy: 84.89]
[Shal] [Epoch 1] [Iteration: 199/600] [Loss: 0.629] [Accuracy: 86.49]
[Deep] [Epoch 1] [Iteration: 399/600] [Loss: 0.699] [Accuracy: 86.38]
[Shal] [Epoch 1] [Iteration: 399/600] [Loss: 0.757] [Accuracy: 87.18]
[Deep] [Epoch 1] [Iteration: 599/600] [Loss: 0.446] [Accuracy: 87.47]
[Shal] [Epoch 1] [Iteration: 599/600] [Loss: 0.504] [Accuracy: 87.51]
[Deep] [Epoch 2] [Iteration: 199/600] [Loss: 0.463] [Accuracy: 88.41]
[Shal] [Epoch 2] [Iteration: 199/600] [Loss: 0.549] [Accuracy: 87.76]
[Deep] [Epoch 2] [It