In [1]:
# 1. dataset (MNIST, My DataSet, ImageFolder(Cat, Dog 각각 데이터가 분류되어있어야 사용가능))
# 2. DataLoader : 배치 단위로 데이터셋을 로드
# 3. model (perceptron -> LeNet5 -> ResNet)

In [50]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
import torchvision
from torch.utils.data import DataLoader 
import torch.optim as optim
from torchvision.datasets import MNIST, FashionMNIST, CIFAR10
import torchvision.transforms.v2 as v2 # 데이터 전처리 변환(transform) 기능
from torchvision.models import alexnet, AlexNet_Weights, vgg16, VGG16_Weights, resnet50, ResNet50_Weights
# pip install torch-summary
from torchsummary import summary
from torchvision.datasets import ImageFolder
from torchvision.models import list_models
import time

### LeNet5

In [3]:
class LeNet5(nn.Module):

    # --- 이건 생성자
    def __init__(self):
        super(LeNet5, self).__init__()
        self.features = nn.Sequential(nn.Conv2d(1,6,kernel_size=5,padding="same"), # 합성곱층 - 이미지 특징 추출(필터=커널 이용 이미지 스캔)
                                      nn.ReLU(), # 활성화함수 - 비선형성으로 패턴 학습할 수 있도록
                                      nn.MaxPool2d(2), # 풀링층 = 맵크기 줄임 - MaxPool은 그 구역에 가장 큰 값으로
                                      nn.Conv2d(6,16,kernel_size=5,padding="same"),
                                      nn.ReLU(),
                                      nn.MaxPool2d(2),
                                      nn.Conv2d(16,126,kernel_size=5,padding="same"),
                                      nn.ReLU(),
                                      nn.MaxPool2d(2))
        
        self.flatten = nn.Flatten() # 3차원을 1차원으로 바꿈
        
        self.classifier = nn.Sequential(nn.Linear(126*3*3,128), # 완전연결층 - 추출된 특징들을 조합하고 변환
                                        nn.ReLU(), # 활성화함수
                                        nn.Linear(128,64),
                                        nn.ReLU(),
                                        nn.Linear(64,10), # 10: 분류하고자 하는 가지수 
                                        nn.Softmax(dim=1)) # 확률 분포로 변환
        # 1134개->128개->64개->10개->확률 분포

    # --- 이건 메서드인데, 입력 데이터가 모델을 통과하는 과정을 정의 (입력~출력)
    def forward(self,x):
        x = self.features(x)      # 특징 추출 부분 (합성곱 + 풀링)
        x = self.flatten(x)       # 3D -> 1D 변환
        x = self.classifier(x)    # 분류 부분 (완전연결층)
        return x

### MNIST

In [4]:
model = LeNet5()

In [5]:
images = torch.rand(64, 1, 28, 28)

In [6]:
dataset = datasets.MNIST("data", download=True, transform=torchvision.transforms.ToTensor())

In [7]:
data_loader = DataLoader(dataset, batch_size=32)

In [8]:
for X_train, y_label in data_loader:
  print(X_train.shape, y_label.shape)
  break # 첫번째 배치의 (X_train.shape: 입력 데이터 배치의 형태, y_label.shape: 레이블 배치의 형태) 확인

# torch.Size([32, 1, 28, 28]) : 배치크기는 32, 채널수는 1(흑백), 이미지크기는 28*28
# torch.Size([32]) : 32개의 레이블 (32개 이미지 각각에 대한 정답을 나타내는 숫자들)

torch.Size([32, 1, 28, 28]) torch.Size([32])


### Adam optimizer

In [9]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
epochs = 1

for _ in range(epochs):
  for X_train, y_label in data_loader:
    optimizer.zero_grad() # 새로운 배치에 대한 기울기 계산 전에 이전값 초기화
    outputs = model(X_train)
    loss = loss_fn(outputs, y_label) # 예측값과 실제 레이블을 비교하여 손실을 계산
    loss.backward() # 미분
    optimizer.step() # 모델의 매개변수를 업데이트

### MNIST, FashionMNIST, CIFAR10

In [10]:
dataset = MNIST('data', download=True, transform=v2.ToTensor())
dataset = FashionMNIST('data', download=True, transform=v2.ToTensor())
dataset = CIFAR10('data', download=True, transform=v2.ToTensor())



Files already downloaded and verified


In [11]:
dataset.data.shape

(50000, 32, 32, 3)

In [12]:
data_loader = DataLoader(dataset, batch_size=64)

In [13]:
50000/64

781.25

### AlexNet

In [14]:
# https://pytorch.org/vision/main/_modules/torchvision/models/alexnet.html#alexnet

In [15]:
model = alexnet(weights=AlexNet_Weights.IMAGENET1K_V1)

In [16]:
model # (avgpool): AdaptiveAvgPool2d(output_size=(6, 6)) : 평균 풀링을 수행 -> 입력을 (6, 6) 크기로 조정

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [17]:
# classifier에서 out_features=1000 대신 분류하고자 하는 가지수(2)로 바꿔줘야하니까
model.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(512, 64),
            nn.ReLU(inplace=True),
            nn.Linear(64, 2),
            nn.Softmax() # 추가
        )
model

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=512, bias=True)
  

In [18]:
summary(model, (3,224,224))

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 256, 6, 6]           --
|    └─Conv2d: 2-1                       [-1, 64, 55, 55]          23,296
|    └─ReLU: 2-2                         [-1, 64, 55, 55]          --
|    └─MaxPool2d: 2-3                    [-1, 64, 27, 27]          --
|    └─Conv2d: 2-4                       [-1, 192, 27, 27]         307,392
|    └─ReLU: 2-5                         [-1, 192, 27, 27]         --
|    └─MaxPool2d: 2-6                    [-1, 192, 13, 13]         --
|    └─Conv2d: 2-7                       [-1, 384, 13, 13]         663,936
|    └─ReLU: 2-8                         [-1, 384, 13, 13]         --
|    └─Conv2d: 2-9                       [-1, 256, 13, 13]         884,992
|    └─ReLU: 2-10                        [-1, 256, 13, 13]         --
|    └─Conv2d: 2-11                      [-1, 256, 13, 13]         590,080
|    └─ReLU: 2-12                        [-1, 256, 13, 13]   

  return self._call_impl(*args, **kwargs)


Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 256, 6, 6]           --
|    └─Conv2d: 2-1                       [-1, 64, 55, 55]          23,296
|    └─ReLU: 2-2                         [-1, 64, 55, 55]          --
|    └─MaxPool2d: 2-3                    [-1, 64, 27, 27]          --
|    └─Conv2d: 2-4                       [-1, 192, 27, 27]         307,392
|    └─ReLU: 2-5                         [-1, 192, 27, 27]         --
|    └─MaxPool2d: 2-6                    [-1, 192, 13, 13]         --
|    └─Conv2d: 2-7                       [-1, 384, 13, 13]         663,936
|    └─ReLU: 2-8                         [-1, 384, 13, 13]         --
|    └─Conv2d: 2-9                       [-1, 256, 13, 13]         884,992
|    └─ReLU: 2-10                        [-1, 256, 13, 13]         --
|    └─Conv2d: 2-11                      [-1, 256, 13, 13]         590,080
|    └─ReLU: 2-12                        [-1, 256, 13, 13]   

In [19]:
image = torch.rand(32,3,224,224)
model(image).shape

torch.Size([32, 2])

In [20]:
for p in model.parameters():
    print(p.names, type(p), p.shape, p.requires_grad) # p.requires_grad를 true에서 false로 바꿔서 기록못하도록 바꿔줘야 미분가능

(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([64, 3, 11, 11]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([64]) True
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([192, 64, 5, 5]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([192]) True
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([384, 192, 3, 3]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([384]) True
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([256, 384, 3, 3]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([256]) True
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([256, 256, 3, 3]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([256]) True
(None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([512, 9216]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([512]) True
(None, None)

In [21]:
model = alexnet(weights=AlexNet_Weights.IMAGENET1K_V1)

In [22]:
for p in model.parameters():
    # print(p.names, type(p), p.shape, p.requires_grad)
    p.requires_grad = False

In [23]:
summary(model, (3,224,224))

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 256, 6, 6]           --
|    └─Conv2d: 2-1                       [-1, 64, 55, 55]          (23,296)
|    └─ReLU: 2-2                         [-1, 64, 55, 55]          --
|    └─MaxPool2d: 2-3                    [-1, 64, 27, 27]          --
|    └─Conv2d: 2-4                       [-1, 192, 27, 27]         (307,392)
|    └─ReLU: 2-5                         [-1, 192, 27, 27]         --
|    └─MaxPool2d: 2-6                    [-1, 192, 13, 13]         --
|    └─Conv2d: 2-7                       [-1, 384, 13, 13]         (663,936)
|    └─ReLU: 2-8                         [-1, 384, 13, 13]         --
|    └─Conv2d: 2-9                       [-1, 256, 13, 13]         (884,992)
|    └─ReLU: 2-10                        [-1, 256, 13, 13]         --
|    └─Conv2d: 2-11                      [-1, 256, 13, 13]         (590,080)
|    └─ReLU: 2-12                        [-1, 256, 

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 256, 6, 6]           --
|    └─Conv2d: 2-1                       [-1, 64, 55, 55]          (23,296)
|    └─ReLU: 2-2                         [-1, 64, 55, 55]          --
|    └─MaxPool2d: 2-3                    [-1, 64, 27, 27]          --
|    └─Conv2d: 2-4                       [-1, 192, 27, 27]         (307,392)
|    └─ReLU: 2-5                         [-1, 192, 27, 27]         --
|    └─MaxPool2d: 2-6                    [-1, 192, 13, 13]         --
|    └─Conv2d: 2-7                       [-1, 384, 13, 13]         (663,936)
|    └─ReLU: 2-8                         [-1, 384, 13, 13]         --
|    └─Conv2d: 2-9                       [-1, 256, 13, 13]         (884,992)
|    └─ReLU: 2-10                        [-1, 256, 13, 13]         --
|    └─Conv2d: 2-11                      [-1, 256, 13, 13]         (590,080)
|    └─ReLU: 2-12                        [-1, 256, 

In [24]:
model.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(512, 64),
            nn.ReLU(inplace=True),
            nn.Linear(64, 2),
            nn.Softmax() # 추가
        )
model

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=512, bias=True)
  

In [25]:
for p in model.parameters():
    print(p.names, type(p), p.shape, p.requires_grad)

(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([64, 3, 11, 11]) False
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([64]) False
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([192, 64, 5, 5]) False
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([192]) False
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([384, 192, 3, 3]) False
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([384]) False
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([256, 384, 3, 3]) False
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([256]) False
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([256, 256, 3, 3]) False
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([256]) False
(None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([512, 9216]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([512]) True
(N

In [26]:
my_transform = v2.Compose([
    v2.Resize((224,224)), # 이미지사이즈 맞춰주기
    v2.ToTensor()
])
root = "data/dogs-vs-cats"
dataset = ImageFolder(root, transform=my_transform)

In [27]:
data_loader = DataLoader(dataset, batch_size=32, shuffle=True)
for X_train, y_label in data_loader:
    print(X_train.shape, y_label.shape)
    break

torch.Size([32, 3, 224, 224]) torch.Size([32])


In [28]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

In [29]:
# 자주 사용하니까 fit 메서드로 만들어둠
def fit(model, data_loader, loss_fn, optimizer, epochs):
    for i in range(epochs):
        for X_train, y_label in data_loader:
            optimizer.zero_grad()
            outputs = model(X_train)
            loss = loss_fn(outputs, y_label)
            loss.backward()
            optimizer.step()
    return model

In [30]:
# fit(model, data_loader, loss_fn, optimizer, 1)

In [31]:
for X_train, y_label in data_loader:
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = loss_fn(outputs, y_label)
    loss.backward()
    optimizer.step()
    break

### VGGNet

In [32]:
# https://pytorch.org/vision/stable/_modules/torchvision/models/vgg.html#VGG16_Weights

In [33]:
model = vgg16(weights=VGG16_Weights.IMAGENET1K_V1)

In [34]:
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [35]:
model.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 256),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(256, 64),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(64, 2)
        )

In [36]:
summary(model, (3, 224, 224))

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 512, 7, 7]           --
|    └─Conv2d: 2-1                       [-1, 64, 224, 224]        1,792
|    └─ReLU: 2-2                         [-1, 64, 224, 224]        --
|    └─Conv2d: 2-3                       [-1, 64, 224, 224]        36,928
|    └─ReLU: 2-4                         [-1, 64, 224, 224]        --
|    └─MaxPool2d: 2-5                    [-1, 64, 112, 112]        --
|    └─Conv2d: 2-6                       [-1, 128, 112, 112]       73,856
|    └─ReLU: 2-7                         [-1, 128, 112, 112]       --
|    └─Conv2d: 2-8                       [-1, 128, 112, 112]       147,584
|    └─ReLU: 2-9                         [-1, 128, 112, 112]       --
|    └─MaxPool2d: 2-10                   [-1, 128, 56, 56]         --
|    └─Conv2d: 2-11                      [-1, 256, 56, 56]         295,168
|    └─ReLU: 2-12                        [-1, 256, 56, 56]      

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 512, 7, 7]           --
|    └─Conv2d: 2-1                       [-1, 64, 224, 224]        1,792
|    └─ReLU: 2-2                         [-1, 64, 224, 224]        --
|    └─Conv2d: 2-3                       [-1, 64, 224, 224]        36,928
|    └─ReLU: 2-4                         [-1, 64, 224, 224]        --
|    └─MaxPool2d: 2-5                    [-1, 64, 112, 112]        --
|    └─Conv2d: 2-6                       [-1, 128, 112, 112]       73,856
|    └─ReLU: 2-7                         [-1, 128, 112, 112]       --
|    └─Conv2d: 2-8                       [-1, 128, 112, 112]       147,584
|    └─ReLU: 2-9                         [-1, 128, 112, 112]       --
|    └─MaxPool2d: 2-10                   [-1, 128, 56, 56]         --
|    └─Conv2d: 2-11                      [-1, 256, 56, 56]         295,168
|    └─ReLU: 2-12                        [-1, 256, 56, 56]      

In [37]:
for p in model.parameters():
    p.requires_grad = False

for param in model.classifier.parameters():
    param.requires_grad = True

# "classifier 레이어만 학습 가능하도록 한다" 
# -> 모델의 특정 부분(여기서는 classifier 레이어)만 파라미터를 업데이트 가능하게 설정하고, 나머지 부분은 학습 과정에서 변경되지 않도록 하는 것
# 이 방법은 전이 학습(transfer learning)에서 흔히 사용
# 모델의 나머지 부분(예: feature extractor)은 고정되어 있으며, classifier 레이어만 새로운 데이터셋에 맞게 조정

In [38]:
model.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 1024),
            nn.ReLU(),
            nn.Dropout(),
            nn.Linear(1024, 2)
)
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [39]:
summary(model, (3, 224, 224))

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 512, 7, 7]           --
|    └─Conv2d: 2-1                       [-1, 64, 224, 224]        (1,792)
|    └─ReLU: 2-2                         [-1, 64, 224, 224]        --
|    └─Conv2d: 2-3                       [-1, 64, 224, 224]        (36,928)
|    └─ReLU: 2-4                         [-1, 64, 224, 224]        --
|    └─MaxPool2d: 2-5                    [-1, 64, 112, 112]        --
|    └─Conv2d: 2-6                       [-1, 128, 112, 112]       (73,856)
|    └─ReLU: 2-7                         [-1, 128, 112, 112]       --
|    └─Conv2d: 2-8                       [-1, 128, 112, 112]       (147,584)
|    └─ReLU: 2-9                         [-1, 128, 112, 112]       --
|    └─MaxPool2d: 2-10                   [-1, 128, 56, 56]         --
|    └─Conv2d: 2-11                      [-1, 256, 56, 56]         (295,168)
|    └─ReLU: 2-12                        [-1, 256, 56,

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 512, 7, 7]           --
|    └─Conv2d: 2-1                       [-1, 64, 224, 224]        (1,792)
|    └─ReLU: 2-2                         [-1, 64, 224, 224]        --
|    └─Conv2d: 2-3                       [-1, 64, 224, 224]        (36,928)
|    └─ReLU: 2-4                         [-1, 64, 224, 224]        --
|    └─MaxPool2d: 2-5                    [-1, 64, 112, 112]        --
|    └─Conv2d: 2-6                       [-1, 128, 112, 112]       (73,856)
|    └─ReLU: 2-7                         [-1, 128, 112, 112]       --
|    └─Conv2d: 2-8                       [-1, 128, 112, 112]       (147,584)
|    └─ReLU: 2-9                         [-1, 128, 112, 112]       --
|    └─MaxPool2d: 2-10                   [-1, 128, 56, 56]         --
|    └─Conv2d: 2-11                      [-1, 256, 56, 56]         (295,168)
|    └─ReLU: 2-12                        [-1, 256, 56,

### ResNet

In [40]:
# https://pytorch.org/vision/stable/_modules/torchvision/models/resnet.html#ResNet50_Weights

In [41]:
model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)

In [42]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [43]:
for p in model.parameters():
    p.requires_grad = False

In [44]:
model.fc = nn.Sequential(
    nn.Linear(2048,2),
    nn.Softmax(dim=1)
)

In [45]:
summary(model, (3, 224, 224))

Layer (type:depth-idx)                   Output Shape              Param #
├─Conv2d: 1-1                            [-1, 64, 112, 112]        (9,408)
├─BatchNorm2d: 1-2                       [-1, 64, 112, 112]        (128)
├─ReLU: 1-3                              [-1, 64, 112, 112]        --
├─MaxPool2d: 1-4                         [-1, 64, 56, 56]          --
├─Sequential: 1-5                        [-1, 256, 56, 56]         --
|    └─Bottleneck: 2-1                   [-1, 256, 56, 56]         --
|    |    └─Conv2d: 3-1                  [-1, 64, 56, 56]          (4,096)
|    |    └─BatchNorm2d: 3-2             [-1, 64, 56, 56]          (128)
|    |    └─ReLU: 3-3                    [-1, 64, 56, 56]          --
|    |    └─Conv2d: 3-4                  [-1, 64, 56, 56]          (36,864)
|    |    └─BatchNorm2d: 3-5             [-1, 64, 56, 56]          (128)
|    |    └─ReLU: 3-6                    [-1, 64, 56, 56]          --
|    |    └─Conv2d: 3-7                  [-1, 256, 56, 56]  

Layer (type:depth-idx)                   Output Shape              Param #
├─Conv2d: 1-1                            [-1, 64, 112, 112]        (9,408)
├─BatchNorm2d: 1-2                       [-1, 64, 112, 112]        (128)
├─ReLU: 1-3                              [-1, 64, 112, 112]        --
├─MaxPool2d: 1-4                         [-1, 64, 56, 56]          --
├─Sequential: 1-5                        [-1, 256, 56, 56]         --
|    └─Bottleneck: 2-1                   [-1, 256, 56, 56]         --
|    |    └─Conv2d: 3-1                  [-1, 64, 56, 56]          (4,096)
|    |    └─BatchNorm2d: 3-2             [-1, 64, 56, 56]          (128)
|    |    └─ReLU: 3-3                    [-1, 64, 56, 56]          --
|    |    └─Conv2d: 3-4                  [-1, 64, 56, 56]          (36,864)
|    |    └─BatchNorm2d: 3-5             [-1, 64, 56, 56]          (128)
|    |    └─ReLU: 3-6                    [-1, 64, 56, 56]          --
|    |    └─Conv2d: 3-7                  [-1, 256, 56, 56]  

In [46]:
list_models()

['alexnet',
 'convnext_base',
 'convnext_large',
 'convnext_small',
 'convnext_tiny',
 'deeplabv3_mobilenet_v3_large',
 'deeplabv3_resnet101',
 'deeplabv3_resnet50',
 'densenet121',
 'densenet161',
 'densenet169',
 'densenet201',
 'efficientnet_b0',
 'efficientnet_b1',
 'efficientnet_b2',
 'efficientnet_b3',
 'efficientnet_b4',
 'efficientnet_b5',
 'efficientnet_b6',
 'efficientnet_b7',
 'efficientnet_v2_l',
 'efficientnet_v2_m',
 'efficientnet_v2_s',
 'fasterrcnn_mobilenet_v3_large_320_fpn',
 'fasterrcnn_mobilenet_v3_large_fpn',
 'fasterrcnn_resnet50_fpn',
 'fasterrcnn_resnet50_fpn_v2',
 'fcn_resnet101',
 'fcn_resnet50',
 'fcos_resnet50_fpn',
 'googlenet',
 'inception_v3',
 'keypointrcnn_resnet50_fpn',
 'lraspp_mobilenet_v3_large',
 'maskrcnn_resnet50_fpn',
 'maskrcnn_resnet50_fpn_v2',
 'maxvit_t',
 'mc3_18',
 'mnasnet0_5',
 'mnasnet0_75',
 'mnasnet1_0',
 'mnasnet1_3',
 'mobilenet_v2',
 'mobilenet_v3_large',
 'mobilenet_v3_small',
 'mvit_v1_b',
 'mvit_v2_s',
 'quantized_googlenet',
 '

### train_model

In [47]:
def train_model(model,dataloaders,criterion,optimizer,device,num_epochs=13,is_train=True):
    since = time.time()
    acc_history = []
    loss_hisotry = []
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs-1))
        print('-'*10)

        running_loss = 0.0
        running_corrects = 0

        for inputs,labels in dataloaders:
            inputs = inputs.to(device)
            labels = labels.to(device)

            model.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs, 1)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()*inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss/len(dataloaders.dataset)
        epoch_acc = running_corrects.double()/len(dataloaders.dataset)
    
        print('Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))
    
        if epoch_acc > best_acc:
            best_acc = epoch_acc
    
        acc_history.append(epoch_acc.item())
        loss_history.append(epoch_loss)
        torch.save(model.state_dict(), os.path.join('data/catanddog/', '{0:0=2d}.pth'.format(epoch)))
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed//60, time_elapsed%60))
    print('Best_Acc: {:4f}'.format(best_acc))
    return acc_history, loss_history

In [53]:
model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)
params_to_update = []
for name,param in model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param) # 파라미터 학습결과를 저장
        print("\t", name)

optimizer = optim.Adam(params_to_update)

	 conv1.weight
	 bn1.weight
	 bn1.bias
	 layer1.0.conv1.weight
	 layer1.0.bn1.weight
	 layer1.0.bn1.bias
	 layer1.0.conv2.weight
	 layer1.0.bn2.weight
	 layer1.0.bn2.bias
	 layer1.0.conv3.weight
	 layer1.0.bn3.weight
	 layer1.0.bn3.bias
	 layer1.0.downsample.0.weight
	 layer1.0.downsample.1.weight
	 layer1.0.downsample.1.bias
	 layer1.1.conv1.weight
	 layer1.1.bn1.weight
	 layer1.1.bn1.bias
	 layer1.1.conv2.weight
	 layer1.1.bn2.weight
	 layer1.1.bn2.bias
	 layer1.1.conv3.weight
	 layer1.1.bn3.weight
	 layer1.1.bn3.bias
	 layer1.2.conv1.weight
	 layer1.2.bn1.weight
	 layer1.2.bn1.bias
	 layer1.2.conv2.weight
	 layer1.2.bn2.weight
	 layer1.2.bn2.bias
	 layer1.2.conv3.weight
	 layer1.2.bn3.weight
	 layer1.2.bn3.bias
	 layer2.0.conv1.weight
	 layer2.0.bn1.weight
	 layer2.0.bn1.bias
	 layer2.0.conv2.weight
	 layer2.0.bn2.weight
	 layer2.0.bn2.bias
	 layer2.0.conv3.weight
	 layer2.0.bn3.weight
	 layer2.0.bn3.bias
	 layer2.0.downsample.0.weight
	 layer2.0.downsample.1.weight
	 layer2.0.downs

In [54]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()

data_path = 'data/catanddog/train'
my_transform = v2.Compose([
    v2.Resize(size=(224,224)),
    v2.ToTensor()
])
train_dataset = ImageFolder(data_path, transform=my_transform)
train_loader = DataLoader(train_dataset, batch_size=32)
train_acc_hist, train_loss_hist = train_model(resnet50, train_loader, criterion, optimizer, device)

Epoch 0/12
----------


AttributeError: 'function' object has no attribute 'to'

In [55]:
"cuda" if torch.cuda.is_available() else "cpu"

'cpu'

In [56]:
torch.cuda.is_available()

False