In [1]:
# 1. dataset (MNIST, My DataSet, ImageFolder(Cat, Dog 각각 데이터가 분류되어있어야 사용가능))
# 2. DataLoader : 배치 단위로 데이터셋을 로드
# 3. model (perceptron -> LeNet5 -> ResNet)

In [38]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
import torchvision
from torch.utils.data import DataLoader 
import torch.optim as optim
from torchvision.datasets import MNIST, FashionMNIST, CIFAR10
import torchvision.transforms.v2 as v2 # 데이터 전처리 변환(transform) 기능
from torchvision.models import alexnet, AlexNet_Weights
# pip install torch-summary
from torchsummary import summary
from torchvision.datasets import ImageFolder

### LeNet5

In [3]:
class LeNet5(nn.Module):

    # --- 이건 생성자
    def __init__(self):
        super(LeNet5, self).__init__()
        self.features = nn.Sequential(nn.Conv2d(1,6,kernel_size=5,padding="same"), # 합성곱층 - 이미지 특징 추출(필터=커널 이용 이미지 스캔)
                                      nn.ReLU(), # 활성화함수 - 비선형성으로 패턴 학습할 수 있도록
                                      nn.MaxPool2d(2), # 풀링층 = 맵크기 줄임 - MaxPool은 그 구역에 가장 큰 값으로
                                      nn.Conv2d(6,16,kernel_size=5,padding="same"),
                                      nn.ReLU(),
                                      nn.MaxPool2d(2),
                                      nn.Conv2d(16,126,kernel_size=5,padding="same"),
                                      nn.ReLU(),
                                      nn.MaxPool2d(2))
        
        self.flatten = nn.Flatten() # 3차원을 1차원으로 바꿈
        
        self.classifier = nn.Sequential(nn.Linear(126*3*3,128), # 완전연결층 - 추출된 특징들을 조합하고 변환
                                        nn.ReLU(), # 활성화함수
                                        nn.Linear(128,64),
                                        nn.ReLU(),
                                        nn.Linear(64,10), # 10: 분류하고자 하는 가지수 
                                        nn.Softmax(dim=1)) # 확률 분포로 변환
        # 1134개->128개->64개->10개->확률 분포

    # --- 이건 메서드인데, 입력 데이터가 모델을 통과하는 과정을 정의 (입력~출력)
    def forward(self,x):
        x = self.features(x)      # 특징 추출 부분 (합성곱 + 풀링)
        x = self.flatten(x)       # 3D -> 1D 변환
        x = self.classifier(x)    # 분류 부분 (완전연결층)
        return x

### MNIST

In [4]:
model = LeNet5()

In [5]:
images = torch.rand(64, 1, 28, 28)

In [6]:
dataset = datasets.MNIST("data", download=True, transform=torchvision.transforms.ToTensor())

In [7]:
data_loader = DataLoader(dataset, batch_size=32)

In [8]:
for X_train, y_label in data_loader:
  print(X_train.shape, y_label.shape)
  break # 첫번째 배치의 (X_train.shape: 입력 데이터 배치의 형태, y_label.shape: 레이블 배치의 형태) 확인

# torch.Size([32, 1, 28, 28]) : 배치크기는 32, 채널수는 1(흑백), 이미지크기는 28*28
# torch.Size([32]) : 32개의 레이블 (32개 이미지 각각에 대한 정답을 나타내는 숫자들)

torch.Size([32, 1, 28, 28]) torch.Size([32])


### Adam optimizer

In [9]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
epochs = 1

for _ in range(epochs):
  for X_train, y_label in data_loader:
    optimizer.zero_grad() # 새로운 배치에 대한 기울기 계산 전에 이전값 초기화
    outputs = model(X_train)
    loss = loss_fn(outputs, y_label) # 예측값과 실제 레이블을 비교하여 손실을 계산
    loss.backward() # 미분
    optimizer.step() # 모델의 매개변수를 업데이트

### MNIST, FashionMNIST, CIFAR10

In [13]:
dataset = MNIST('data', download=True, transform=v2.ToTensor())
dataset = FashionMNIST('data', download=True, transform=v2.ToTensor())
dataset = CIFAR10('data', download=True, transform=v2.ToTensor())



Files already downloaded and verified


In [14]:
dataset.data.shape

(50000, 32, 32, 3)

In [15]:
data_loader = DataLoader(dataset, batch_size=64)

In [16]:
50000/64

781.25

### AlexNet

In [None]:
# https://pytorch.org/vision/main/_modules/torchvision/models/alexnet.html#alexnet

In [19]:
model = alexnet(weights=AlexNet_Weights.IMAGENET1K_V1)

Downloading: "https://download.pytorch.org/models/alexnet-owt-7be5be79.pth" to C:\Users\user/.cache\torch\hub\checkpoints\alexnet-owt-7be5be79.pth
100.0%


In [21]:
model # (avgpool): AdaptiveAvgPool2d(output_size=(6, 6)) : 평균 풀링을 수행 -> 입력을 (6, 6) 크기로 조정

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [25]:
# classifier에서 out_features=1000 대신 분류하고자 하는 가지수(2)로 바꿔줘야하니까
model.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(512, 64),
            nn.ReLU(inplace=True),
            nn.Linear(64, 2),
            nn.Softmax() # 추가
        )
model

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=512, bias=True)
  

In [28]:
summary(model, (3,224,224))

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 256, 6, 6]           --
|    └─Conv2d: 2-1                       [-1, 64, 55, 55]          23,296
|    └─ReLU: 2-2                         [-1, 64, 55, 55]          --
|    └─MaxPool2d: 2-3                    [-1, 64, 27, 27]          --
|    └─Conv2d: 2-4                       [-1, 192, 27, 27]         307,392
|    └─ReLU: 2-5                         [-1, 192, 27, 27]         --
|    └─MaxPool2d: 2-6                    [-1, 192, 13, 13]         --
|    └─Conv2d: 2-7                       [-1, 384, 13, 13]         663,936
|    └─ReLU: 2-8                         [-1, 384, 13, 13]         --
|    └─Conv2d: 2-9                       [-1, 256, 13, 13]         884,992
|    └─ReLU: 2-10                        [-1, 256, 13, 13]         --
|    └─Conv2d: 2-11                      [-1, 256, 13, 13]         590,080
|    └─ReLU: 2-12                        [-1, 256, 13, 13]   

  return self._call_impl(*args, **kwargs)


Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 256, 6, 6]           --
|    └─Conv2d: 2-1                       [-1, 64, 55, 55]          23,296
|    └─ReLU: 2-2                         [-1, 64, 55, 55]          --
|    └─MaxPool2d: 2-3                    [-1, 64, 27, 27]          --
|    └─Conv2d: 2-4                       [-1, 192, 27, 27]         307,392
|    └─ReLU: 2-5                         [-1, 192, 27, 27]         --
|    └─MaxPool2d: 2-6                    [-1, 192, 13, 13]         --
|    └─Conv2d: 2-7                       [-1, 384, 13, 13]         663,936
|    └─ReLU: 2-8                         [-1, 384, 13, 13]         --
|    └─Conv2d: 2-9                       [-1, 256, 13, 13]         884,992
|    └─ReLU: 2-10                        [-1, 256, 13, 13]         --
|    └─Conv2d: 2-11                      [-1, 256, 13, 13]         590,080
|    └─ReLU: 2-12                        [-1, 256, 13, 13]   

In [30]:
image = torch.rand(32,3,224,224)
model(image).shape

torch.Size([32, 2])

In [31]:
for p in model.parameters():
    print(p.names, type(p), p.shape, p.requires_grad) # p.requires_grad를 true에서 false로 바꿔서 기록못하도록 바꿔줘야 미분가능

(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([64, 3, 11, 11]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([64]) True
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([192, 64, 5, 5]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([192]) True
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([384, 192, 3, 3]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([384]) True
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([256, 384, 3, 3]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([256]) True
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([256, 256, 3, 3]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([256]) True
(None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([512, 9216]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([512]) True
(None, None)

In [32]:
model = alexnet(weights=AlexNet_Weights.IMAGENET1K_V1)

In [34]:
for p in model.parameters():
    # print(p.names, type(p), p.shape, p.requires_grad)
    p.requires_grad = False

In [35]:
summary(model, (3,224,224))

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 256, 6, 6]           --
|    └─Conv2d: 2-1                       [-1, 64, 55, 55]          (23,296)
|    └─ReLU: 2-2                         [-1, 64, 55, 55]          --
|    └─MaxPool2d: 2-3                    [-1, 64, 27, 27]          --
|    └─Conv2d: 2-4                       [-1, 192, 27, 27]         (307,392)
|    └─ReLU: 2-5                         [-1, 192, 27, 27]         --
|    └─MaxPool2d: 2-6                    [-1, 192, 13, 13]         --
|    └─Conv2d: 2-7                       [-1, 384, 13, 13]         (663,936)
|    └─ReLU: 2-8                         [-1, 384, 13, 13]         --
|    └─Conv2d: 2-9                       [-1, 256, 13, 13]         (884,992)
|    └─ReLU: 2-10                        [-1, 256, 13, 13]         --
|    └─Conv2d: 2-11                      [-1, 256, 13, 13]         (590,080)
|    └─ReLU: 2-12                        [-1, 256, 

Layer (type:depth-idx)                   Output Shape              Param #
├─Sequential: 1-1                        [-1, 256, 6, 6]           --
|    └─Conv2d: 2-1                       [-1, 64, 55, 55]          (23,296)
|    └─ReLU: 2-2                         [-1, 64, 55, 55]          --
|    └─MaxPool2d: 2-3                    [-1, 64, 27, 27]          --
|    └─Conv2d: 2-4                       [-1, 192, 27, 27]         (307,392)
|    └─ReLU: 2-5                         [-1, 192, 27, 27]         --
|    └─MaxPool2d: 2-6                    [-1, 192, 13, 13]         --
|    └─Conv2d: 2-7                       [-1, 384, 13, 13]         (663,936)
|    └─ReLU: 2-8                         [-1, 384, 13, 13]         --
|    └─Conv2d: 2-9                       [-1, 256, 13, 13]         (884,992)
|    └─ReLU: 2-10                        [-1, 256, 13, 13]         --
|    └─Conv2d: 2-11                      [-1, 256, 13, 13]         (590,080)
|    └─ReLU: 2-12                        [-1, 256, 

In [36]:
model.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(512, 64),
            nn.ReLU(inplace=True),
            nn.Linear(64, 2),
            nn.Softmax() # 추가
        )
model

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=512, bias=True)
  

In [37]:
for p in model.parameters():
    print(p.names, type(p), p.shape, p.requires_grad)

(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([64, 3, 11, 11]) False
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([64]) False
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([192, 64, 5, 5]) False
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([192]) False
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([384, 192, 3, 3]) False
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([384]) False
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([256, 384, 3, 3]) False
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([256]) False
(None, None, None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([256, 256, 3, 3]) False
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([256]) False
(None, None) <class 'torch.nn.parameter.Parameter'> torch.Size([512, 9216]) True
(None,) <class 'torch.nn.parameter.Parameter'> torch.Size([512]) True
(N

In [40]:
my_transform = v2.Compose([
    v2.Resize((224,224)), # 이미지사이즈 맞춰주기
    v2.ToTensor()
])
root = "data/dogs-vs-cats"
dataset = ImageFolder(root, transform=my_transform)

In [43]:
data_loader = DataLoader(dataset, batch_size=32, shuffle=True)
for X_train, y_label in data_loader:
    print(X_train.shape, y_label.shape)
    break

torch.Size([32, 3, 224, 224]) torch.Size([32])


In [44]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

In [45]:
# 자주 사용하니까 fit 메서드로 만들어둠
def fit(model, data_loader, loss_fn, optimizer, epochs):
    for i in range(epochs):
        for X_train, y_label in data_loader:
            optimizer.zero_grad()
            outputs = model(X_train)
            loss = loss_fn(outputs, y_label)
            loss.backward()
            optimizer.step()
    return model

In [None]:
# fit(model, data_loader, loss_fn, optimizer, 1)

In [47]:
for X_train, y_label in data_loader:
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = loss_fn(outputs, y_label)
    loss.backward()
    optimizer.step()
    break