In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

import torch.nn as nn
import torch.optim as optim

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
# 데이터 불러오기 및 전처리 작업
transform = transforms.Compose(
    [transforms.RandomCrop(32, padding=4),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [4]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=16, shuffle=True) 

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=16,shuffle=False)

# Class
#'plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'

Files already downloaded and verified
Files already downloaded and verified


In [5]:
# ResNet18 불러오기 
# pretrained=True를 하면 ResNet18 구조와 사전 학습 된 파라메타를 모두 불러온다.
# pretrained=False를 하면 ResNet18 구조만 불러온다.
# 모델과 텐서에 .to(device)를 붙여야만 GPU 연산이 가능하니 꼭 기입한다.

model = torchvision.models.resnet18(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\user/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:03<00:00, 11.9MB/s]


In [59]:
from torchsummary import summary

In [47]:
temp = next(iter(trainloader))
temp[0].shape, temp[0][0].shape

(torch.Size([16, 3, 32, 32]), torch.Size([3, 32, 32]))

In [45]:
summary(model.to(device), input_size=(3,224,224))

# RuntimeError: 
# Input type (torch.cuda.FloatTensor) and 
# weight type (torch.FloatTensor) should be the same

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]          36,864
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
       BasicBlock-11           [-1, 64, 56, 56]               0
           Conv2d-12           [-1, 64, 56, 56]          36,864
      BatchNorm2d-13           [-1, 64, 56, 56]             128
             ReLU-14           [-1, 64,

In [48]:
from torchinfo import summary

In [21]:
summary(model)

Layer (type:depth-idx)                   Param #
ResNet                                   --
├─Conv2d: 1-1                            9,408
├─BatchNorm2d: 1-2                       128
├─ReLU: 1-3                              --
├─MaxPool2d: 1-4                         --
├─Sequential: 1-5                        --
│    └─BasicBlock: 2-1                   --
│    │    └─Conv2d: 3-1                  36,864
│    │    └─BatchNorm2d: 3-2             128
│    │    └─ReLU: 3-3                    --
│    │    └─Conv2d: 3-4                  36,864
│    │    └─BatchNorm2d: 3-5             128
│    └─BasicBlock: 2-2                   --
│    │    └─Conv2d: 3-6                  36,864
│    │    └─BatchNorm2d: 3-7             128
│    │    └─ReLU: 3-8                    --
│    │    └─Conv2d: 3-9                  36,864
│    │    └─BatchNorm2d: 3-10            128
├─Sequential: 1-6                        --
│    └─BasicBlock: 2-3                   --
│    │    └─Conv2d: 3-11                 73,728

In [51]:
model.conv1

Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

In [52]:
model.fc

Linear(in_features=512, out_features=1000, bias=True)

In [53]:
# 모델의 구조를 보면 마지막 출력 노드가 1000개라는 것을 알 수 있다. 
# 이는 1000개의 클래스를 가진 ImageNet 데이터를 이용하여 사전학습 된 모델이기 때문이다. 
# 따라서 우리가 사용하는 CIFAR10 데이터에 맞게 출력층의 노드를 10개로 변경해야만 한다.

model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 10)
model = model.to(device)

In [54]:
model.conv1

Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

In [55]:
model.fc

Linear(in_features=512, out_features=10, bias=True)

In [56]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-2)

In [57]:
from tqdm import tqdm

In [58]:
for epoch in tqdm(range(10)):

    running_loss = 0.0
    for data in trainloader:
        
        inputs, labels = data[0].to(device), data[1].to(device)
          
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    cost = running_loss / len(trainloader)        
    print('[%d] loss: %.3f' %(epoch + 1, cost))  

torch.save(model.state_dict(), './models/cifar10_resnet18.pth')      

print('Finished Training')

 10%|█         | 1/10 [01:46<16:00, 106.73s/it]

[1] loss: 1.233


 20%|██        | 2/10 [03:49<15:30, 116.32s/it]

[2] loss: 0.847


 30%|███       | 3/10 [06:21<15:27, 132.51s/it]

[3] loss: 0.751


 40%|████      | 4/10 [09:21<15:07, 151.17s/it]

[4] loss: 0.690


 50%|█████     | 5/10 [13:20<15:15, 183.05s/it]

[5] loss: 0.639


 60%|██████    | 6/10 [20:44<18:05, 271.48s/it]

[6] loss: 0.621


 60%|██████    | 6/10 [20:50<13:53, 208.47s/it]


KeyboardInterrupt: 

In [60]:
model = torchvision.models.resnet18(pretrained=False)



In [61]:
summary(model.to(device), input_size=(3,224,224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
       BatchNorm2d-2         [-1, 64, 112, 112]             128
              ReLU-3         [-1, 64, 112, 112]               0
         MaxPool2d-4           [-1, 64, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]          36,864
       BatchNorm2d-6           [-1, 64, 56, 56]             128
              ReLU-7           [-1, 64, 56, 56]               0
            Conv2d-8           [-1, 64, 56, 56]          36,864
       BatchNorm2d-9           [-1, 64, 56, 56]             128
             ReLU-10           [-1, 64, 56, 56]               0
       BasicBlock-11           [-1, 64, 56, 56]               0
           Conv2d-12           [-1, 64, 56, 56]          36,864
      BatchNorm2d-13           [-1, 64, 56, 56]             128
             ReLU-14           [-1, 64,

In [63]:
for i, (name, param) in enumerate(model.named_parameters()):  
    print(i,name)

0 conv1.weight
1 bn1.weight
2 bn1.bias
3 layer1.0.conv1.weight
4 layer1.0.bn1.weight
5 layer1.0.bn1.bias
6 layer1.0.conv2.weight
7 layer1.0.bn2.weight
8 layer1.0.bn2.bias
9 layer1.1.conv1.weight
10 layer1.1.bn1.weight
11 layer1.1.bn1.bias
12 layer1.1.conv2.weight
13 layer1.1.bn2.weight
14 layer1.1.bn2.bias
15 layer2.0.conv1.weight
16 layer2.0.bn1.weight
17 layer2.0.bn1.bias
18 layer2.0.conv2.weight
19 layer2.0.bn2.weight
20 layer2.0.bn2.bias
21 layer2.0.downsample.0.weight
22 layer2.0.downsample.1.weight
23 layer2.0.downsample.1.bias
24 layer2.1.conv1.weight
25 layer2.1.bn1.weight
26 layer2.1.bn1.bias
27 layer2.1.conv2.weight
28 layer2.1.bn2.weight
29 layer2.1.bn2.bias
30 layer3.0.conv1.weight
31 layer3.0.bn1.weight
32 layer3.0.bn1.bias
33 layer3.0.conv2.weight
34 layer3.0.bn2.weight
35 layer3.0.bn2.bias
36 layer3.0.downsample.0.weight
37 layer3.0.downsample.1.weight
38 layer3.0.downsample.1.bias
39 layer3.1.conv1.weight
40 layer3.1.bn1.weight
41 layer3.1.bn1.bias
42 layer3.1.conv2.wei

In [68]:
param.shape, param[:5]

(torch.Size([1000]),
 tensor([ 0.0041, -0.0438, -0.0341, -0.0435,  0.0330], device='cuda:0',
        grad_fn=<SliceBackward0>))

In [69]:
param.requires_grad

True

In [71]:
model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 10)
model = model.to(device)
# model.load_state_dict(torch.load('./models/cifar10_resnet18.pth'))

In [72]:
correct = 0
total = 0
with torch.no_grad():
    model.eval()
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

Accuracy of the network on the 10000 test images: 11 %


In [109]:
# requires_grad 확인
f_list = [0]
c_list = [1]
for i in f_list:
    print(model.layer1[i].bn1.weight.requires_grad)
    print(model.layer1[i].bn1.bias.requires_grad)
print('-' * 27)    
for j in c_list:
    print(model.layer1[j].bn1.weight.requires_grad)
    print(model.layer1[j].bn1.bias.requires_grad)


True
True
---------------------------
True
True


In [75]:
model.layer1

Sequential(
  (0): BasicBlock(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (1): BasicBlock(
    (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
)

In [100]:
model.layer1[0].bn1.weight

Parameter containing:
tensor([0.3090, 0.2147, 0.2366, 0.4259, 0.5137, 0.2181, 0.2204, 0.2300, 0.2640,
        0.2695, 0.2138, 0.4602, 0.2661, 0.2319, 0.3900, 0.2389, 0.2660, 0.3634,
        0.3474, 0.2477, 0.3285, 0.5349, 0.6440, 0.2275, 0.4482, 0.3078, 0.2604,
        0.4651, 0.2179, 0.2858, 0.3426, 0.4420, 0.4450, 0.4500, 0.5516, 0.5092,
        0.2564, 0.2634, 0.5664, 0.6410, 0.2228, 0.1986, 0.2460, 0.2242, 0.2143,
        0.1982, 0.6368, 0.3106, 0.5049, 0.2403, 0.3065, 0.3760, 0.3794, 0.4281,
        0.2991, 0.3326, 0.2596, 0.3345, 0.2006, 0.4351, 0.1683, 0.5149, 0.2629,
        0.3254], requires_grad=True)

In [99]:
model.layer1[0].bn1.bias

Parameter containing:
tensor([ 0.1657,  0.2420,  0.1780, -0.0431, -0.2053,  0.1598,  0.2929,  0.0912,
         0.1116,  0.0884,  0.1104, -0.2035,  0.1539,  0.0857, -0.1094,  0.0654,
         0.0766, -0.2067, -0.0212,  0.1396,  0.0401, -0.2827, -0.3257, -0.0035,
        -0.4373, -0.1248,  0.1282, -0.0874,  0.1199, -0.0829, -0.5315, -0.0780,
        -0.3876, -0.0547, -0.1816, -0.1888,  0.1320,  0.0031, -0.2697, -0.2984,
         0.1394,  0.2597,  0.1372,  0.0053,  0.0132,  0.3295, -0.2715, -0.0187,
        -0.2467,  0.1579,  0.0165, -0.0890, -0.1903, -0.0787,  0.1700, -0.4832,
         0.0619, -0.0677,  0.3125, -0.5064,  0.3138, -0.2617, -0.1545,  0.0063],
       requires_grad=True)

In [102]:
model.layer1[0].bn1.bias.requires_grad

True

In [110]:
for i, (name, param) in enumerate(model.named_parameters()):
    param.requires_grad = False
    if i == 9:
        print('end')
        break

end


In [111]:
f_list = [0]
c_list = [1]
for i in f_list:
    print(model.layer1[i].bn1.weight.requires_grad)
    print(model.layer1[i].bn1.bias.requires_grad)
print('-' * 27)    
for j in c_list:
    print(model.layer1[j].bn1.weight.requires_grad)
    print(model.layer1[j].bn1.bias.requires_grad)

False
False
---------------------------
True
True
