In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import argparse
import numpy as np

## Data Preparation

In [2]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainset, valset = torch.utils.data.random_split(trainset, [40000, 10000])
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)


trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)
valloader = torch.utils.data.DataLoader(valset, batch_size=4, 
                                        shuffle=False)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [3]:
import matplotlib.pyplot as plt
import numpy as np

# functions to show an image


def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))

<Figure size 640x480 with 1 Axes>

plane   cat plane  ship


## Model Architecture

In [4]:
class MLP(nn.Module):
    def __init__(self, in_dim, out_dim, hid_dim, n_layer, act):
        super(MLP, self).__init__()
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.hid_dim = hid_dim
        self.n_layer = n_layer
        self.act = act
        
        self.fc = nn.Linear(self.in_dim, self.hid_dim)
        self.linears = nn.ModuleList()
        
        for i in range(self.n_layer-1):
            self.linears.append(nn.Linear(self.hid_dim, self.hid_dim))
        self.fc2 = nn.Linear(self.hid_dim, self.out_dim)
        
        if self.act == 'relu':
            self.act = nn.ReLU()
          
    def forward(self, x):
        x = self.act(self.fc(x))
        for fc in self.linears:
            x = self.act(fc(x))
        x = self.fc2(x)
        return x
      
net = MLP(3072, 10, 100, 4, 'relu')

## Define Experiment

In [5]:
def experiment(args):
  
    net = MLP(args.in_dim, args.out_dim, args.hid_dim, args.n_layer, args.act)
    
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Assuming that we are on a CUDA machine, this should print a CUDA device:

    print(device)
    net.to(device)
    #net.cuda()
    #print(net)
    
    list_epoch = [] 
    list_train_loss = []
    list_val_loss = []
    list_acc = []
    list_acc_epoch = []

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.mm)
    
    for epoch in range(args.epoch):  # loop over the dataset multiple times
    
        # ==== Train ===== #
        net.train()
        optimizer.zero_grad()
        
        running_loss = 0.0
        train_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs
            inputs, labels = data
            inputs = inputs.view(-1, 3072)
            
            #inputs = inputs.cuda()
            #labels = labels.cuda()
            inputs, labels = inputs.to(device), labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            train_loss += loss.item()
            
            list_epoch.append(i)
            list_train_loss.append(train_loss)
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 2000))
                running_loss = 0.0
                

        # ==== Validation ====== #
        net.eval()
        optimizer.zero_grad()
        
        correct = 0
        total = 0
        val_loss = 0 ########
        with torch.no_grad():
            for data in valloader:
                images, labels = data
                images = images.view(-1, 3072)
                
                ################################
                #images = images.cuda()
                #labels = labels.cuda()
                images, labels = inputs.to(device), labels.to(device)
                
                outputs = net(images)

                loss = criterion(outputs, labels)
                val_loss += loss.item()
                
                list_val_loss.append(val_loss)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            val_loss = val_loss / len(valloader)
            val_acc = 100 * correct / total
            
        print('Epoch {}, Train Loss: {}, Val Loss: {}, Val Acc: {}'.format(epoch, train_loss, val_loss, val_acc ))


    # ===== Evaluation ===== #
    net.eval()
    optimizer.zero_grad()
    
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images = images.view(-1, 3072)
            
            images, labels = inputs.to(device), labels.to(device)
            #images = images.cuda()
            #labels = labels.cuda()

            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        test_acc = 100 * correct / total
            
    return train_loss, val_loss, val_acc, test_acc
    

## Experiment

In [None]:
seed = 123
np.random.seed(seed)
torch.manual_seed(seed)

parser = argparse.ArgumentParser()
args = parser.parse_args("")


args.n_layer = 5
args.in_dim = 3072
args.out_dim = 10
args.hid_dim = 100
args.act = 'relu'

args.lr = 0.001
args.mm = 0.9
args.epoch = 2


list_var1 = [4, 5, 6]        #hidden layer 수
list_var2 = [50, 100, 150]    #dimension

for var1 in list_var1:
    for var2 in list_var2:
        args.n_layer = var1
        args.hid_dim = var2
        result = experiment(args)
        print(result)

print("Train Finished")

cuda:0
[1,  2000] loss: 2.188
[1,  4000] loss: 1.889
[1,  6000] loss: 1.768
[1,  8000] loss: 1.721
[1, 10000] loss: 1.673
Epoch 0, Train Loss: 18477.4324080348, Val Loss: 3.0475153408050537, Val Acc: 10.29
[2,  2000] loss: 1.632
[2,  4000] loss: 1.607
[2,  6000] loss: 1.600
[2,  8000] loss: 1.574
[2, 10000] loss: 1.557
Epoch 1, Train Loss: 15938.930830597878, Val Loss: 2.8248177763938904, Val Acc: 9.73
(15938.930830597878, 2.8248177763938904, 9.73, 10.03)
cuda:0
[1,  2000] loss: 2.224
[1,  4000] loss: 1.917
[1,  6000] loss: 1.788
[1,  8000] loss: 1.681
[1, 10000] loss: 1.654
Epoch 0, Train Loss: 18530.6870226264, Val Loss: 3.595699433517456, Val Acc: 10.52
[2,  2000] loss: 1.602
[2,  4000] loss: 1.586
[2,  6000] loss: 1.570
[2,  8000] loss: 1.547
[2, 10000] loss: 1.517
Epoch 1, Train Loss: 15643.767507374287, Val Loss: 3.0481335682153703, Val Acc: 10.29
(15643.767507374287, 3.0481335682153703, 10.29, 10.07)
cuda:0
[1,  2000] loss: 2.178
[1,  4000] loss: 1.886
[1,  6000] loss: 1.750
[1,

# Report

In [None]:
fig = plt.figure(figsize=(15,5))

# ====== Loss Fluctuation ====== #
ax1 = fig.add_subplot(1, 2, 1)
ax1.plot(list_epoch, list_train_loss, label='train_loss')
ax1.plot(list_epoch, list_val_loss, '--', label='val_loss')
ax1.set_xlabel('epoch')
ax1.set_ylabel('loss')
ax1.grid()
ax1.legend()
ax1.set_title('epoch vs loss')



# 질문있습니다~


사실.. 과제 코드 제대로 못해봤는데요

올려주신 시작코드 training이 너무 오래걸려서 ㅜㅜ기다리다가 끝까지 못해봤어요

일단 시작코드부터 돌려보고 hyperparameter값들 변경해가면서 accuracy 올리고 그래프도 그려보고싶었는데..

너무 느려서? GPU로 제대로 돌고있나?라는 생각이 들었습니당..

내일봐요~ ㅜㅜ