In [1]:
import torch
import torch.nn as nn

torch.manual_seed(1)

<torch._C.Generator at 0x7a11c2d1f6f0>

In [2]:
if torch.cuda.is_available(): device=torch.device('cuda')
print(device)
# colab 환경 사용해서 runtime 유형 변경
#Make sure that your runtime must be 'GPU'
#Print out CUDA clearly
#If not, this task must be really time-consuming

cuda


In [3]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms

#This line is for transformation of Image. You don't need to consider about it.
transform=transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5),(0.5))])

#Train data download
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=transform
)

#Test data download
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=transform
)

#Make dataloader for iteration when training
#would be covered on next toy project
train_dataloader = DataLoader(training_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:02<00:00, 10263295.97it/s]


Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 170011.25it/s]


Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:01<00:00, 3016214.35it/s]


Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 6118525.64it/s]

Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw






In [4]:
#nn.Sequential

class Customnet1(nn.Module):
  """Define your network by using nn.Sequential. Primarily, this network should be inherited by nn.Module from torch.nn
     There are two functions which you have to implement, 1) __init__ & 2) forward

     In __init__, you have to pre-define your layers with dimension. For example, you have to decide the kernel size of Convolution layer, or the size of weight of Affine_forward.

     In forward, you should pass your input through your network. You can use the network, which is pre-defined in __init__. Or make some creative operations which couldn't be implemented in __init__.


  """
  def __init__(self):
    #Intialize the nn.Module firstly
    super().__init__()

    #You can contain your layers here
    #nn.conv2d(input_channel, output_channel, kernel_size, stride, padding, dilation, etc,,,)
    #nn.MaxPool2d(kernel_size, stride, etc,,)
    #The details could be searched in pytorch official docs
    self.layer=nn.Sequential(
                             nn.Conv2d(1,64,(2,2)), #You can control Kernel size, using the tuple like (3,3) or (3,2). The kernel must not be a square.
                             nn.MaxPool2d(4), #This kernel size must also be able to become non-symmetric.
                             nn.ReLU(),
                             nn.Conv2d(64,64,(2,2)),
                             nn.MaxPool2d(4),
                             nn.ReLU(),
                             nn.Flatten(),
                             nn.Linear(64, 10), # In_channel, Output Channel
                             nn.Softmax())
  #As you can see, nn.Sequential is a very comfortable function because that method wraps the whole layers! Just passing through the layer makes us design the network.
  #This

  #forwad has two variables: (self, x)
  # x is the 'input' which is passed from the DataLoader
  # This process is fully automatic so that you don't need to consider any other things.
  def forward(self, x):

    x=self.layer(x)

    return x

In [5]:
# line-by-line
# Not using the nn.Sequential, you can define whole layers line-by-line

class Customnet2(nn.Module):
  """The main principle is same with above.

  """
  def __init__(self):

    super().__init__()

    #Define whole layers individually
    self.conv1=nn.Conv2d(1,64,(2,2))
    self.conv2=nn.Conv2d(64,64,(2,2))
    self.maxpool=nn.MaxPool2d(4)
    self.relu=nn.ReLU()
    self.flatten=nn.Flatten()
    self.linear=nn.Linear(64,10)
    self.softmax=nn.Softmax()

  def forward(self, x):
    #forward should be complicated compared to nn.Sequential's one
    #You can add some operations which are not included in nn.Module
    #For example, if you want to print out the shape of x, you can add the line in this Class.
    #But regretfully, since nn.Sequential does not contain the print method,  you cannot print out the shape of x if you use the nn.Sequential

    x=self.conv1(x)
    x=self.maxpool(x)
    x=self.relu(x)
    x=self.conv2(x)
    x=self.maxpool(x)
    x=self.relu(x)

    x=self.flatten(x)
    x=self.linear(x)
    x=self.softmax(x)

    return x

In [6]:
#model
model=Customnet1().to(device) #You can load your model on GPU!

#learning rate
learning_rate=1e-4

#epoch
epoch=10

#Loss function
loss=nn.CrossEntropyLoss()

#Optimizer
optimizer=torch.optim.Adam(model.parameters(), lr=learning_rate)

In [7]:
#Same with pytorch project 1
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):

        pred = model(X.to(device)) #Also load the input on GPU
        loss = loss_fn(pred.cpu(), y) #Detach your predicted result on CPU #Loss could be calculated on CPU


        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [8]:
#Same with pytorch project 1
def test_loop(dataloader, model, loss_fn):

    model.eval() #test mode
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0



    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X.to(device))
            test_loss += loss_fn(pred.cpu(), y).item()
            correct += (pred.cpu().argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [9]:
for t in range(epoch):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss, optimizer)
    test_loop(test_dataloader, model, loss)
print("Done!")
# 문제에서 주어진 조건으로 customnet을 훈련시켰을 때
# 첫 accuarcy는 56.6% avg loss는 1.958에서 마지막 epoch에서는 69.7% 1.73까지 상승함

Epoch 1
-------------------------------


  input = module(input)


loss: 2.303776  [   64/60000]
loss: 2.296282  [ 6464/60000]
loss: 2.255524  [12864/60000]
loss: 2.214055  [19264/60000]
loss: 2.126472  [25664/60000]
loss: 2.128938  [32064/60000]
loss: 2.067143  [38464/60000]
loss: 2.021931  [44864/60000]
loss: 2.091132  [51264/60000]
loss: 1.997440  [57664/60000]
Test Error: 
 Accuracy: 56.6%, Avg loss: 1.957711 

Epoch 2
-------------------------------
loss: 1.977227  [   64/60000]
loss: 1.989034  [ 6464/60000]
loss: 1.933161  [12864/60000]
loss: 1.971962  [19264/60000]
loss: 1.859908  [25664/60000]
loss: 1.889012  [32064/60000]
loss: 1.885171  [38464/60000]
loss: 1.863105  [44864/60000]
loss: 1.931976  [51264/60000]
loss: 1.846494  [57664/60000]
Test Error: 
 Accuracy: 63.3%, Avg loss: 1.854669 

Epoch 3
-------------------------------
loss: 1.848866  [   64/60000]
loss: 1.937832  [ 6464/60000]
loss: 1.850482  [12864/60000]
loss: 1.896395  [19264/60000]
loss: 1.799412  [25664/60000]
loss: 1.836098  [32064/60000]
loss: 1.836629  [38464/60000]
loss: 

In [10]:
#customnet1 model2 - learning rate 감소
model=Customnet1().to(device) #You can load your model on GPU!

#learning rate
learning_rate=1e-8

#epoch
epoch=10

#Loss function
loss=nn.CrossEntropyLoss()

#Optimizer
optimizer=torch.optim.Adam(model.parameters(), lr=learning_rate)

In [13]:
for t in range(epoch):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss, optimizer)
    test_loop(test_dataloader, model, loss)
print("Done!")
# learning rate를 e^-8로 변경 및 train test코드 다시 실행
# 첫 accuarcy는 10.0%로 매우 낮고 avg loss는 2,303으로 더 크다
#  epoch가 증가해도 accuarcy와 avg loss가 거의 그대로인 걸 확인할 수 있음

Epoch 1
-------------------------------
loss: 2.306494  [   64/60000]
loss: 2.304280  [ 6464/60000]
loss: 2.304470  [12864/60000]
loss: 2.302670  [19264/60000]
loss: 2.303493  [25664/60000]
loss: 2.305059  [32064/60000]
loss: 2.302422  [38464/60000]
loss: 2.304759  [44864/60000]
loss: 2.306313  [51264/60000]
loss: 2.304266  [57664/60000]
Test Error: 
 Accuracy: 10.0%, Avg loss: 2.303070 

Epoch 2
-------------------------------
loss: 2.306481  [   64/60000]
loss: 2.304270  [ 6464/60000]
loss: 2.304457  [12864/60000]
loss: 2.302658  [19264/60000]
loss: 2.303478  [25664/60000]
loss: 2.305047  [32064/60000]
loss: 2.302408  [38464/60000]
loss: 2.304748  [44864/60000]
loss: 2.306300  [51264/60000]
loss: 2.304250  [57664/60000]
Test Error: 
 Accuracy: 10.0%, Avg loss: 2.303056 

Epoch 3
-------------------------------
loss: 2.306469  [   64/60000]
loss: 2.304260  [ 6464/60000]
loss: 2.304445  [12864/60000]
loss: 2.302644  [19264/60000]
loss: 2.303465  [25664/60000]
loss: 2.305036  [32064/600

In [14]:
#customnet1 model3 - learning rate 증가
model=Customnet1().to(device) #You can load your model on GPU!

#learning rate
learning_rate=1e-3

#epoch
epoch=10

#Loss function
loss=nn.CrossEntropyLoss()

#Optimizer
optimizer=torch.optim.Adam(model.parameters(), lr=learning_rate)

In [17]:
for t in range(epoch):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss, optimizer)
    test_loop(test_dataloader, model, loss)
print("Done!")
# learning rate를 e^-3로 변경 및 train test코드 다시 실행
# 첫 epoch가 accuarcy 75.7% avg loss도 1.71로 꽤 좋은 값을 나타냄
# epoch가 증가할 때 accuarcy는 조금씩 증가하여 81.1%까지 상승하였으며 avg loss는 1.65에서 더 이상 감소하지 않음
# 추후 분석 시도 이 learning rate 활용

Epoch 1
-------------------------------
loss: 2.304287  [   64/60000]
loss: 2.123026  [ 6464/60000]
loss: 1.865740  [12864/60000]
loss: 1.933408  [19264/60000]
loss: 1.764824  [25664/60000]
loss: 1.719012  [32064/60000]
loss: 1.745906  [38464/60000]
loss: 1.723997  [44864/60000]
loss: 1.742696  [51264/60000]
loss: 1.712629  [57664/60000]
Test Error: 
 Accuracy: 75.7%, Avg loss: 1.714678 

Epoch 2
-------------------------------
loss: 1.667711  [   64/60000]
loss: 1.746578  [ 6464/60000]
loss: 1.650918  [12864/60000]
loss: 1.736564  [19264/60000]
loss: 1.680282  [25664/60000]
loss: 1.702725  [32064/60000]
loss: 1.692104  [38464/60000]
loss: 1.639371  [44864/60000]
loss: 1.671891  [51264/60000]
loss: 1.675994  [57664/60000]
Test Error: 
 Accuracy: 78.2%, Avg loss: 1.683269 

Epoch 3
-------------------------------
loss: 1.634984  [   64/60000]
loss: 1.695371  [ 6464/60000]
loss: 1.625947  [12864/60000]
loss: 1.719544  [19264/60000]
loss: 1.655523  [25664/60000]
loss: 1.689841  [32064/600

In [20]:
#customnet1 model4 - loss function 변경(multi margin)
model=Customnet1().to(device) #You can load your model on GPU!

#learning rate
learning_rate=1e-3

#epoch
epoch=10

#Loss function
loss=nn.MultiMarginLoss()

#Optimizer
optimizer=torch.optim.Adam(model.parameters(), lr=learning_rate)

In [21]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):

        pred = model(X.to(device)) #Also load the input on GPU
        loss = loss_fn(pred.cpu(), y) #Detach your predicted result on CPU #Loss could be calculated on CPU


        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test_loop(dataloader, model, loss_fn):

    model.eval() #test mode
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0



    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X.to(device))
            test_loss += loss_fn(pred.cpu(), y).item()
            correct += (pred.cpu().argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
for t in range(epoch):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss, optimizer)
    test_loop(test_dataloader, model, loss)
print("Done!")

# loss function을 multimargin으로 변경 후 분석 실시
# accuarcy는 이전 case와 비슷한데 avg loss가 초기값부터 0.294로 굉장히 낮음
# avg loss는 낮지만 최종 epoch에서의 accuarcy는 77% 정도로 오히려 낮음


Epoch 1
-------------------------------
loss: 0.898860  [   64/60000]
loss: 0.651965  [ 6464/60000]
loss: 0.467618  [12864/60000]
loss: 0.450539  [19264/60000]
loss: 0.310964  [25664/60000]
loss: 0.339141  [32064/60000]
loss: 0.281062  [38464/60000]
loss: 0.231473  [44864/60000]
loss: 0.296274  [51264/60000]
loss: 0.331300  [57664/60000]
Test Error: 
 Accuracy: 72.2%, Avg loss: 0.293695 

Epoch 2
-------------------------------
loss: 0.232056  [   64/60000]
loss: 0.269252  [ 6464/60000]
loss: 0.235746  [12864/60000]
loss: 0.328808  [19264/60000]
loss: 0.238930  [25664/60000]
loss: 0.340291  [32064/60000]
loss: 0.234024  [38464/60000]
loss: 0.186111  [44864/60000]
loss: 0.224922  [51264/60000]
loss: 0.297970  [57664/60000]
Test Error: 
 Accuracy: 75.1%, Avg loss: 0.259880 

Epoch 3
-------------------------------
loss: 0.186246  [   64/60000]
loss: 0.249681  [ 6464/60000]
loss: 0.219208  [12864/60000]
loss: 0.325037  [19264/60000]
loss: 0.221319  [25664/60000]
loss: 0.327434  [32064/600

In [31]:
#customnet1 model5 - optimizer 변경(Adagard)
model=Customnet1().to(device) #You can load your model on GPU!

#learning rate
learning_rate=1e-3

#epoch
epoch=10

#Loss function
loss=nn.CrossEntropyLoss()

#Optimizer
optimizer =torch.optim.Adagrad(model.parameters(), lr=learning_rate, weight_decay=0.01)


In [32]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):

        pred = model(X.to(device)) #Also load the input on GPU
        loss = loss_fn(pred.cpu(), y) #Detach your predicted result on CPU #Loss could be calculated on CPU


        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test_loop(dataloader, model, loss_fn):

    model.eval() #test mode
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0



    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X.to(device))
            test_loss += loss_fn(pred.cpu(), y).item()
            correct += (pred.cpu().argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
for t in range(epoch):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss, optimizer)
    test_loop(test_dataloader, model, loss)
print("Done!")

# optimizer를 adagard로 변경 후 weight deacy는 0.01로 설정 후 학습
# learning rate가 e^-3으로 높은데도 초기 accuarcy가 35.6으로 꽤 낮음 avg loss는 2.19임
# 학습을 해도 accuarcy가 42% 선에서 멈추고 avg loss 또한 2.05정도로 굉장히 천천히 작아짐

Epoch 1
-------------------------------
loss: 2.307628  [   64/60000]


  input = module(input)


loss: 2.290865  [ 6464/60000]
loss: 2.269075  [12864/60000]
loss: 2.250744  [19264/60000]
loss: 2.231396  [25664/60000]
loss: 2.266430  [32064/60000]
loss: 2.218637  [38464/60000]
loss: 2.225161  [44864/60000]
loss: 2.210211  [51264/60000]
loss: 2.188641  [57664/60000]
Test Error: 
 Accuracy: 35.6%, Avg loss: 2.192800 

Epoch 2
-------------------------------
loss: 2.196078  [   64/60000]
loss: 2.205303  [ 6464/60000]
loss: 2.184271  [12864/60000]
loss: 2.177668  [19264/60000]
loss: 2.156433  [25664/60000]
loss: 2.179735  [32064/60000]
loss: 2.146525  [38464/60000]
loss: 2.146695  [44864/60000]
loss: 2.159908  [51264/60000]
loss: 2.102440  [57664/60000]
Test Error: 
 Accuracy: 41.5%, Avg loss: 2.130341 

Epoch 3
-------------------------------
loss: 2.150289  [   64/60000]
loss: 2.151695  [ 6464/60000]
loss: 2.133396  [12864/60000]
loss: 2.140352  [19264/60000]
loss: 2.104897  [25664/60000]
loss: 2.126768  [32064/60000]
loss: 2.109979  [38464/60000]
loss: 2.106456  [44864/60000]
loss: 

In [33]:
#customnet1 model6 - optimizer 변경(RMSprop)
model=Customnet1().to(device) #You can load your model on GPU!

#learning rate
learning_rate=1e-3

#epoch
epoch=10

#Loss function
loss=nn.CrossEntropyLoss()

#Optimizer
optimizer =torch.optim.RMSprop(model.parameters(), lr=learning_rate, alpha=0.9)

In [34]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):

        pred = model(X.to(device)) #Also load the input on GPU
        loss = loss_fn(pred.cpu(), y) #Detach your predicted result on CPU #Loss could be calculated on CPU


        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test_loop(dataloader, model, loss_fn):

    model.eval() #test mode
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0



    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X.to(device))
            test_loss += loss_fn(pred.cpu(), y).item()
            correct += (pred.cpu().argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
for t in range(epoch):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss, optimizer)
    test_loop(test_dataloader, model, loss)
print("Done!")
# RMSprop으로 변경 시 첫 accuaracy가 75.4% avg loss가 1.716으로 굉장히 낮음
# 초기 예상은 ADAM이 RMSprop에 비해 accuarcy가 많이 높을 것이라 생각했는데 본 학습에서는 비슷한 성능을 보임

Epoch 1
-------------------------------
loss: 2.296600  [   64/60000]
loss: 2.062076  [ 6464/60000]
loss: 1.850274  [12864/60000]
loss: 1.874274  [19264/60000]
loss: 1.811322  [25664/60000]
loss: 1.731295  [32064/60000]
loss: 1.777835  [38464/60000]
loss: 1.704476  [44864/60000]
loss: 1.739373  [51264/60000]
loss: 1.702033  [57664/60000]
Test Error: 
 Accuracy: 75.4%, Avg loss: 1.716103 

Epoch 2
-------------------------------
loss: 1.688532  [   64/60000]
loss: 1.723668  [ 6464/60000]
loss: 1.647938  [12864/60000]
loss: 1.743416  [19264/60000]
loss: 1.661476  [25664/60000]
loss: 1.703732  [32064/60000]
loss: 1.713250  [38464/60000]
loss: 1.645413  [44864/60000]
loss: 1.694991  [51264/60000]
loss: 1.682664  [57664/60000]
Test Error: 
 Accuracy: 77.0%, Avg loss: 1.699213 

Epoch 3
-------------------------------
loss: 1.651886  [   64/60000]
loss: 1.658438  [ 6464/60000]
loss: 1.630722  [12864/60000]
loss: 1.716839  [19264/60000]
loss: 1.649394  [25664/60000]
loss: 1.695916  [32064/600

In [35]:
#customnet1 model7 - optimizer 변경(SGD)
model=Customnet1().to(device) #You can load your model on GPU!

#learning rate
learning_rate=1e-3

#epoch
epoch=10

#Loss function
loss=nn.CrossEntropyLoss()

#Optimizer
optimizer =torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)

In [36]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):

        pred = model(X.to(device)) #Also load the input on GPU
        loss = loss_fn(pred.cpu(), y) #Detach your predicted result on CPU #Loss could be calculated on CPU


        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test_loop(dataloader, model, loss_fn):

    model.eval() #test mode
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0



    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X.to(device))
            test_loss += loss_fn(pred.cpu(), y).item()
            correct += (pred.cpu().argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
for t in range(epoch):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss, optimizer)
    test_loop(test_dataloader, model, loss)
print("Done!")
# 또 다른 optimizer인 sgd로 학습할 때는 첫 epoch에서의 accuarcy는 24.5%인데 두 번째 accuarcy는 12%대로 낮아짐
# 다시 accuarcy가 상승하여 마지막 apoch에는 50%를 넘겨 오히려 Adagrad보다 높은 것을 알 수 있음 AVG loss도 점점 작아져 2보다 낮아짐

Epoch 1
-------------------------------
loss: 2.297557  [   64/60000]
loss: 2.298979  [ 6464/60000]
loss: 2.298034  [12864/60000]
loss: 2.301540  [19264/60000]
loss: 2.300325  [25664/60000]
loss: 2.298663  [32064/60000]
loss: 2.297675  [38464/60000]
loss: 2.297965  [44864/60000]
loss: 2.293558  [51264/60000]
loss: 2.297538  [57664/60000]
Test Error: 
 Accuracy: 24.5%, Avg loss: 2.294572 

Epoch 2
-------------------------------
loss: 2.288501  [   64/60000]
loss: 2.292382  [ 6464/60000]
loss: 2.289253  [12864/60000]
loss: 2.293307  [19264/60000]
loss: 2.287500  [25664/60000]
loss: 2.292384  [32064/60000]
loss: 2.286513  [38464/60000]
loss: 2.288982  [44864/60000]
loss: 2.276459  [51264/60000]
loss: 2.289790  [57664/60000]
Test Error: 
 Accuracy: 12.7%, Avg loss: 2.278842 

Epoch 3
-------------------------------
loss: 2.264313  [   64/60000]
loss: 2.275727  [ 6464/60000]
loss: 2.265461  [12864/60000]
loss: 2.274645  [19264/60000]
loss: 2.254979  [25664/60000]
loss: 2.278110  [32064/600

In [37]:
## customnet2는 customnet1과 굉장히 유사하기 때문에 customnet1의 model1 4 6번에 대해서 똑같이 실행
## customnet2 model1
#model
model=Customnet2().to(device) #You can load your model on GPU!

#learning rate
learning_rate=1e-4

#epoch
epoch=10

#Loss function
loss=nn.CrossEntropyLoss()

#Optimizer
optimizer=torch.optim.Adam(model.parameters(), lr=learning_rate)

In [38]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):

        pred = model(X.to(device)) #Also load the input on GPU
        loss = loss_fn(pred.cpu(), y) #Detach your predicted result on CPU #Loss could be calculated on CPU


        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test_loop(dataloader, model, loss_fn):

    model.eval() #test mode
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0



    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X.to(device))
            test_loss += loss_fn(pred.cpu(), y).item()
            correct += (pred.cpu().argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
for t in range(epoch):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss, optimizer)
    test_loop(test_dataloader, model, loss)
print("Done!")
# customnet1의 경우 첫 accuarcy는 56.6% avg loss는 1.958에서 마지막 epoch에서는 69.7% 1.73까지 상승함
# customnet2에서 학습했을 때 첫 accuarcy는 53.4% avg loss는 1.986으로 어느 정도 유사함
# accuarcy가 비슷할 줄 알았으나 accuarcy가 epoch3에서 70%를 넘기더니 마지막 epoch에서 accuarcy 78% avg loss 1.69까지 변화함
# epoch를 더 늘려봤을 떄 둘의 차이도 확인해보면 좋을 듯함(제 컴퓨터 이슈로 거기까지는 힘드네요...)

Epoch 1
-------------------------------
loss: 2.298865  [   64/60000]


  x=self.softmax(x)


loss: 2.286464  [ 6464/60000]
loss: 2.252849  [12864/60000]
loss: 2.227568  [19264/60000]
loss: 2.153438  [25664/60000]
loss: 2.114349  [32064/60000]
loss: 2.080132  [38464/60000]
loss: 2.031701  [44864/60000]
loss: 2.034065  [51264/60000]
loss: 2.006605  [57664/60000]
Test Error: 
 Accuracy: 53.4%, Avg loss: 1.986317 

Epoch 2
-------------------------------
loss: 1.949072  [   64/60000]
loss: 1.962610  [ 6464/60000]
loss: 1.876100  [12864/60000]
loss: 1.931191  [19264/60000]
loss: 1.877311  [25664/60000]
loss: 1.825539  [32064/60000]
loss: 1.863250  [38464/60000]
loss: 1.828765  [44864/60000]
loss: 1.855518  [51264/60000]
loss: 1.846874  [57664/60000]
Test Error: 
 Accuracy: 68.8%, Avg loss: 1.811678 

Epoch 3
-------------------------------
loss: 1.779472  [   64/60000]
loss: 1.824034  [ 6464/60000]
loss: 1.726490  [12864/60000]
loss: 1.814656  [19264/60000]
loss: 1.797148  [25664/60000]
loss: 1.744746  [32064/60000]
loss: 1.786983  [38464/60000]
loss: 1.777323  [44864/60000]
loss: 

In [39]:
#customnet2 model4 - loss function 변경(multi margin)
model=Customnet2().to(device) #You can load your model on GPU!

#learning rate
learning_rate=1e-3

#epoch
epoch=10

#Loss function
loss=nn.MultiMarginLoss()

#Optimizer
optimizer=torch.optim.Adam(model.parameters(), lr=learning_rate)

In [40]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):

        pred = model(X.to(device)) #Also load the input on GPU
        loss = loss_fn(pred.cpu(), y) #Detach your predicted result on CPU #Loss could be calculated on CPU


        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test_loop(dataloader, model, loss_fn):

    model.eval() #test mode
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0



    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X.to(device))
            test_loss += loss_fn(pred.cpu(), y).item()
            correct += (pred.cpu().argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
for t in range(epoch):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss, optimizer)
    test_loop(test_dataloader, model, loss)
print("Done!")
# customnet2에서 learing rate e-3, loss function을 multi-margin으로 하였을 때 학습 비교
# 첫 epoch에서 accuracy는 64.9%로 customnet1의 72.2%보다 낮고 avg loss가 0.368로 customnet1보다 높긴 하지만 굉장히 낮음
# avg loss는 0.23정도까지 낮아지고 최종 epoch에서의 accuarcy는 78% 정도로 customnet1에서와 굉장히 유사함을 알 수 있음

Epoch 1
-------------------------------
loss: 0.898772  [   64/60000]


  x=self.softmax(x)


loss: 0.688860  [ 6464/60000]
loss: 0.472018  [12864/60000]
loss: 0.471097  [19264/60000]
loss: 0.328204  [25664/60000]
loss: 0.427985  [32064/60000]
loss: 0.330902  [38464/60000]
loss: 0.327388  [44864/60000]
loss: 0.421901  [51264/60000]
loss: 0.381837  [57664/60000]
Test Error: 
 Accuracy: 64.9%, Avg loss: 0.368470 

Epoch 2
-------------------------------
loss: 0.346029  [   64/60000]
loss: 0.374907  [ 6464/60000]
loss: 0.365523  [12864/60000]
loss: 0.419771  [19264/60000]
loss: 0.274033  [25664/60000]
loss: 0.427115  [32064/60000]
loss: 0.263612  [38464/60000]
loss: 0.290129  [44864/60000]
loss: 0.371680  [51264/60000]
loss: 0.355707  [57664/60000]
Test Error: 
 Accuracy: 67.1%, Avg loss: 0.338069 

Epoch 3
-------------------------------
loss: 0.307622  [   64/60000]
loss: 0.343020  [ 6464/60000]
loss: 0.360501  [12864/60000]
loss: 0.415045  [19264/60000]
loss: 0.239905  [25664/60000]
loss: 0.420004  [32064/60000]
loss: 0.258867  [38464/60000]
loss: 0.293451  [44864/60000]
loss: 

In [41]:
#customnet2 model6 - optimizer 변경(RMSprop)
model=Customnet1().to(device) #You can load your model on GPU!

#learning rate
learning_rate=1e-3

#epoch
epoch=10

#Loss function
loss=nn.CrossEntropyLoss()

#Optimizer
optimizer =torch.optim.RMSprop(model.parameters(), lr=learning_rate, alpha=0.9)

In [42]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):

        pred = model(X.to(device)) #Also load the input on GPU
        loss = loss_fn(pred.cpu(), y) #Detach your predicted result on CPU #Loss could be calculated on CPU


        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test_loop(dataloader, model, loss_fn):

    model.eval() #test mode
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0



    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X.to(device))
            test_loss += loss_fn(pred.cpu(), y).item()
            correct += (pred.cpu().argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
for t in range(epoch):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss, optimizer)
    test_loop(test_dataloader, model, loss)
print("Done!")
# Customnet2에서 RMSprop으로 변경 시 첫 accuaracy가 76.4% avg loss가 1.707로 customnet1에서와 굉장히 유사함
# 최종 epoch에서도 accuracy 82.4% avg loss 1.63로 customnet1의 82.4% 1.61로 거의 일치함
# 몇몇 예시를 통해 customnet1과 customnet2에서의 학습이 일부 사례를 제외하면 거의 일치하는 것을 알 수 있음

Epoch 1
-------------------------------
loss: 2.300474  [   64/60000]


  input = module(input)


loss: 2.049211  [ 6464/60000]
loss: 1.865687  [12864/60000]
loss: 1.843297  [19264/60000]
loss: 1.758742  [25664/60000]
loss: 1.705935  [32064/60000]
loss: 1.760592  [38464/60000]
loss: 1.699112  [44864/60000]
loss: 1.729528  [51264/60000]
loss: 1.700549  [57664/60000]
Test Error: 
 Accuracy: 76.4%, Avg loss: 1.706823 

Epoch 2
-------------------------------
loss: 1.674795  [   64/60000]
loss: 1.719642  [ 6464/60000]
loss: 1.653118  [12864/60000]
loss: 1.732027  [19264/60000]
loss: 1.649596  [25664/60000]
loss: 1.675847  [32064/60000]
loss: 1.707992  [38464/60000]
loss: 1.635135  [44864/60000]
loss: 1.678058  [51264/60000]
loss: 1.672011  [57664/60000]
Test Error: 
 Accuracy: 79.2%, Avg loss: 1.679669 

Epoch 3
-------------------------------
loss: 1.624713  [   64/60000]
loss: 1.675228  [ 6464/60000]
loss: 1.651397  [12864/60000]
loss: 1.739381  [19264/60000]
loss: 1.640641  [25664/60000]
loss: 1.679266  [32064/60000]
loss: 1.666852  [38464/60000]
loss: 1.624706  [44864/60000]
loss: 

In [43]:
#So, many people use the mixed way like below:

class Customnet3(nn.Module):
  """The main principle is same with above.

  """
  def __init__(self):

    super().__init__()

    #More simpler than line-by-line but still intuitive to revise
    #Making blocks!
    self.conv1=nn.Sequential(nn.Conv2d(1,64,(2,2)),
                             nn.MaxPool(4),
                             nn.ReLU())
    self.conv2=nn.Sequential(nn.Conv2d(64,64,(2,2)),
                             nn.MaxPool(4),
                             nn.ReLU())
    self.flatten=nn.Flatten()
    self.linear=nn.Sequential(nn.Linear(64,10),
                              nn.Softmax())


  def forward(self, x):

    #More simpler than line-by-line forward

    x=self.conv1(x)
    x=self.conv2(x)
    x=self.flatten(x)
    x=self.linear(x)

    return x

In [9]:
import torchvision



transform = transforms.Compose([
    transforms.Resize((227, 227)), #Size is different with original paper, since we have to obtain 55*55 feature maps after 1st convolution layers
                                   #224*224 images do not lead to 55*55 feature maps
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])


#Use CIFAR100, instead of ImageNet Dataset
trainset = torchvision.datasets.CIFAR100('./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

testset = torchvision.datasets.CIFAR100('./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)


Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169001437/169001437 [00:13<00:00, 12476783.54it/s]


Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified


In [13]:
#model
model=Alexnet().to(device)

#learning rate
learning_rate=1e-4

#epoch
epoch=10

#Loss function
loss=nn.CrossEntropyLoss()

#Optimizer
optimizer=torch.optim.Adam(model.parameters(), lr=learning_rate)

NameError: ignored