In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# 1. Module Importing 

In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

# 2. Hyperparameter

In [3]:
batch_size = 256
learning_rate = 0.0002
num_epoch = 10

# 3. Data 

In [4]:
mnist_train = dset.MNIST("./", train=True, 
                         transform=transforms.Compose([
                             transforms.Resize(34),                             # 원래 28x28인 이미지를 34x34로 늘립니다.
                             transforms.CenterCrop(28),                         # 중앙 28x28만을 뽑아냅니다.
                             transforms.RandomHorizontalFlip(),                 # 랜덤하게 좌우반전 합니다.
                             transforms.Lambda(lambda x: x.rotate(90)),         # 람다함수를 이용해 90도 회전해줍니다.
                             transforms.ToTensor(),                             # 이미지를 텐서로 변형합니다.(데이터를 0에서 255까지 있는 값을 0에서 1사이 값으로 변환)
                             transforms.Normalize(mean=(0.1307,), std=(0.3081,)) # 정규화입니다. (평균/255=0.1307,표준편차/255=0.3081)
                         ]),
                         target_transform=None, 
                         download=True)
mnist_test = dset.MNIST("./", train=False, 
                        transform=transforms.Compose([
                             transforms.ToTensor(),
                             transforms.Normalize(mean=(0.1307,), std=(0.3081,))
                        ]),
                        target_transform=None, 
                        download=True)

In [5]:
print(mnist_train.__getitem__(0)[0].size(), mnist_train.__len__())
mnist_test.__getitem__(0)[0].size(), mnist_test.__len__()

torch.Size([1, 28, 28]) 60000


(torch.Size([1, 28, 28]), 10000)

In [6]:
train_loader = torch.utils.data.DataLoader(mnist_train,batch_size=batch_size, shuffle=True,num_workers=2,drop_last=True)
test_loader = torch.utils.data.DataLoader(mnist_test,batch_size=batch_size, shuffle=False,num_workers=2,drop_last=True)

# 4. Model  

In [7]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.layer = nn.Sequential(
            nn.Conv2d(1,16,3,padding=1),  # 1,28,28 
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Dropout2d(0.2),
            nn.Conv2d(16,32,3,padding=1), # 28 x 28
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Dropout2d(0.2),
            nn.MaxPool2d(2,2),            # 14 x 14
            nn.Conv2d(32,64,3,padding=1), # 14 x 14
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Dropout2d(0.2),
            nn.MaxPool2d(2,2)             #  7 x 7
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(64*7*7,100),
            nn.BatchNorm1d(100),
            nn.ReLU(),
            nn.Linear(100,10)
        )
        #Initialization. 
      # 초기화 하는 방법
        for m in self.modules():
          if isinstance(m, nn.Conv2d):
            init.kaiming_normal_(m.weight.data)
            m.bias.data.fill_(0)
          elif isinstance(m, nn.Linear): 
            init.kaiming_normal_(m.weight.data)
            m.bias.data.fill_(0)
            
    def forward(self,x):
        out = self.layer(x)
        out = out.view(-1,64*7*7)
        out = self.fc_layer(out)
        return out

In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

model = CNN().to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) #weight_decay=0.01, L2정규화의 lambda=weight_decay
        

cuda:0


In [9]:
from torchsummary import summary
summary(model,(1,28,28), batch_size=256)   

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [256, 16, 28, 28]             160
       BatchNorm2d-2          [256, 16, 28, 28]              32
              ReLU-3          [256, 16, 28, 28]               0
         Dropout2d-4          [256, 16, 28, 28]               0
            Conv2d-5          [256, 32, 28, 28]           4,640
       BatchNorm2d-6          [256, 32, 28, 28]              64
              ReLU-7          [256, 32, 28, 28]               0
         Dropout2d-8          [256, 32, 28, 28]               0
         MaxPool2d-9          [256, 32, 14, 14]               0
           Conv2d-10          [256, 64, 14, 14]          18,496
      BatchNorm2d-11          [256, 64, 14, 14]             128
             ReLU-12          [256, 64, 14, 14]               0
        Dropout2d-13          [256, 64, 14, 14]               0
        MaxPool2d-14            [256, 6

# 6.Train


In [10]:
# L1,L2정규화 직접 정의해보기 : 실제 사용할 때는 L2정규화는 optimizer의 weight_decay사용하면 되고, L2 정규화는 train시 for문 안에 넣어줘야 합니다. 

# all_parameters = torch.cat([x.view(-1) for x in model.parameters()]) 
# #특정 부분에 제약을 걸고싶으면 
# #all_parameters = torch.cat([x.view(-1) for x in model.layer.parameters()])  이렇게 바꿔주시면 됩니다. 
# lamda1=0.05
# l1_regularization = lambda1 * torch.norm(all_parameters, 1)
# l2_regularization = lambda1 * torch.norm(all_parameters, 2)

# loss = loss_func + l1_regularization 
# loss = loss_func + l2_regularization 

## 6-1 L1 정규화하여 train 

In [11]:

for i in range(num_epoch):
    for j,[image,label] in enumerate(train_loader):
        x = image.to(device)
        y_= label.to(device)
        
        optimizer.zero_grad()
        output = model.forward(x)
        all_parameters = torch.cat([x.view(-1) for x in model.parameters()])
        lambda1=0.05 
        l1_regularization = lambda1 * torch.norm(all_parameters, 1)
        loss = loss_func(output,y_)+ l1_regularization 
        loss.backward()
        optimizer.step()
        
    if i % 10 == 0:
      print(loss) 

tensor(381.9536, device='cuda:0', grad_fn=<AddBackward0>)


## 7-1.Test

In [12]:
correct = 0
total = 0
model.eval()
with torch.no_grad():
  for image,label in test_loader:
      x = image.to(device)
      y_= label.to(device)

      output = model.forward(x)
      _,output_index = torch.max(output,1)

      total += label.size(0)
      correct += (output_index == y_).sum().float()

  print("Accuracy of Test Data: {}".format(100*correct/total))

Accuracy of Test Data: 12.960737228393555


# 6-2 Train L2 정규화하여 train 

In [13]:
for i in range(num_epoch):
    for j,[image,label] in enumerate(train_loader):
        x = image.to(device)
        y_= label.to(device)
        
        optimizer.zero_grad()
        output = model.forward(x)
        all_parameters = torch.cat([x.view(-1) for x in model.parameters()])
        lambda1=0.05 
        l2_regularization = lambda1 * torch.norm(all_parameters, 2)
        loss = loss_func(output,y_)+ l2_regularization 
        loss.backward()
        optimizer.step()

    print(loss) 

tensor(1.8040, device='cuda:0', grad_fn=<AddBackward0>)
tensor(1.6171, device='cuda:0', grad_fn=<AddBackward0>)
tensor(1.6204, device='cuda:0', grad_fn=<AddBackward0>)
tensor(1.5041, device='cuda:0', grad_fn=<AddBackward0>)
tensor(1.4748, device='cuda:0', grad_fn=<AddBackward0>)
tensor(1.4456, device='cuda:0', grad_fn=<AddBackward0>)
tensor(1.5023, device='cuda:0', grad_fn=<AddBackward0>)
tensor(1.4482, device='cuda:0', grad_fn=<AddBackward0>)
tensor(1.4266, device='cuda:0', grad_fn=<AddBackward0>)
tensor(1.3644, device='cuda:0', grad_fn=<AddBackward0>)


## 7-2.Test

In [14]:
correct = 0
total = 0
model.eval()
with torch.no_grad():
  for image,label in test_loader:
      x = image.to(device)
      y_= label.to(device)

      output = model.forward(x)
      _,output_index = torch.max(output,1)

      total += label.size(0)
      correct += (output_index == y_).sum().float()

  print("Accuracy of Test Data: {}".format(100*correct/total))

Accuracy of Test Data: 15.48477554321289
