In [1]:

import torch
import torch.nn as nn # 신경망 모듈을 정의하는 데 사용
import torchvision.datasets as dsets # 이미지 데이터셋(MNIST, CIFAR-10 등)을 불러오기 위한 모듈
import torchvision.transforms as transforms  # 이미지 데이터 전처리 (e.g., 텐서 변환, 정규화)
from torch.utils.data import DataLoader # DataLoader: 데이터셋을 미니배치 단위로 분할해 지정한 데이터셋을 반복할 수 있도록 해주는 클래스
import torch.nn.init

In [2]:
# (1) 입력 데이터 정의: 1개의 흑백 이미지 (28x28)로 구성된 텐서
input=torch.Tensor(1,1,28,28) #batch size(한번에 로딩하는 이미지 수), channel, width, height
print(input.size())# 텐서 크기 출력
print(input)  # 텐서 값 출력 (초기화되지 않아 랜덤 값이 들어 있음)


torch.Size([1, 1, 28, 28])
tensor([[[[0.0000e+00, 0.0000e+00, 1.9153e+22, 1.4013e-45, 0.0000e+00,
           0.0000e+00, 4.1142e-37, 1.4013e-45, 5.0048e-37, 1.4013e-45,
           4.0582e-37, 1.4013e-45, 5.0274e-37, 1.4013e-45, 4.1447e-37,
           1.4013e-45, 5.2812e-37, 1.4013e-45, 2.0542e-35, 1.4013e-45,
           3.2706e-30, 1.4013e-45, 3.2705e-30, 1.4013e-45, 3.2309e-37,
           1.4013e-45, 9.6258e+13, 1.4013e-45],
          [9.8718e+13, 1.4013e-45, 9.7616e+13, 1.4013e-45, 3.2309e-37,
           1.4013e-45, 9.7586e+13, 1.4013e-45, 9.6571e+13, 1.4013e-45,
           9.7165e+13, 1.4013e-45, 9.7617e+13, 1.4013e-45, 9.7617e+13,
           1.4013e-45, 9.7618e+13, 1.4013e-45, 1.6999e+22, 1.4013e-45,
           1.6994e+22, 1.4013e-45, 1.6998e+22, 1.4013e-45, 1.6910e+22,
           1.4013e-45, 1.6998e+22, 1.4013e-45],
          [3.2309e-37, 1.4013e-45, 3.8653e-30, 1.4013e-45, 1.9539e+22,
           1.4013e-45, 1.9855e+22, 1.4013e-45, 1.9760e+22, 1.4013e-45,
           1.6998e+22, 1.

In [3]:
# (2) 합성곱 층(Conv2D) 정의
# Conv2d(입력 채널 수, 출력 채널 수, 커널 크기, 스트라이드=1, 패딩)
conv1 = nn.Conv2d(1, 32, 3, padding=1) #stride 생략하면 1 # 흑백 이미지(채널=1)에 32개의 필터(커널 크기=3x3) 적용, 패딩으로 크기 유지
#Conv2d(input channel number, output channel number, kernel size, stride=1, padding=1)
print(conv1) # 합성곱 층의 구조 출력

Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))


In [4]:
# (1) 두 번째 합성곱 층 정의
conv2 = nn.Conv2d(32, 64, 3, padding=1)  # 입력 채널: 32, 출력 채널: 64, 커널 크기: 3x3, 패딩: 1
print(conv2)  # conv2 층 구조 출력


Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))


In [6]:
# (2) 풀링 층 정의
pool = nn.MaxPool2d(2)  # 맥스 풀링: 커널 크기 2x2, 스트라이드=커널 크기 (2)
print(pool)  # pool 층 구조 출력

MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)


In [7]:
# (3) 입력 데이터에 첫 번째 합성곱 층 적용
out1 = conv1(input)  # 첫 번째 합성곱 연산 (conv1)
out2 = pool(out1)  # 첫 번째 풀링 연산 (pool)
print(out1.size())  # 합성곱 결과 텐서 크기 출력
print(out2.size())  # 풀링 결과 텐서 크기 출력

torch.Size([1, 32, 28, 28])
torch.Size([1, 32, 14, 14])


In [8]:
# (4) 두 번째 합성곱과 풀링 층 적용
out3 = conv2(out2)  # 두 번째 합성곱 연산 (conv2)
out4 = pool(out3)  # 두 번째 풀링 연산 (pool)
print(out3.size())  # 두 번째 합성곱 결과 텐서 크기 출력
print(out4.size())  # 두 번째 풀링 결과 텐서 크기 출력

torch.Size([1, 64, 14, 14])
torch.Size([1, 64, 7, 7])


In [9]:
# (5) Fully connected layer 준비
out = out4.view(out4.size(0), -1)  # Flatten: 4D 텐서를 2D 텐서로 펼침 (batch_size, 나머지 차원)
print(out.size())  # 펼친 결과 텐서 크기 출력 (batch size, width*height*channel)

torch.Size([1, 3136])


In [10]:
# (6) Fully connected layer 정의
fc = nn.Linear(3136, 10)  # 입력 크기: 3136, 출력 크기: 10 (10개의 클래스)
out = fc(out)  # Fully connected layer 연산 적용
print(out.size())  # 최종 출력 텐서 크기 출력 (batch size, class number)

torch.Size([1, 10])


In [11]:
# (7) CUDA 장치 설정
device = 'cuda' if torch.cuda.is_available() else 'cpu'  # CUDA 사용 가능하면 'cuda', 아니면 'cpu' 설정

# 대안적인 device 설정 방법 (주석 처리된 코드)
"""
device = ''
if torch.cuda.is_available:
    device = 'cuda'
else:
    device = 'cpu'
"""

# (8) 랜덤 시드 고정
torch.manual_seed(777)  # Random seed 고정: 동일한 결과를 재현 가능하게 설정

# (9) CUDA에서 랜덤 시드 고정
if device == 'cuda':
    torch.cuda.manual_seed_all(777)  # CUDA 환경에서도 동일한 결과 재현

In [12]:
# 하이퍼파라미터 설정
learning_rate = 0.001  # 학습률
epochs = 15  # 학습 반복 횟수
batch_size = 100  # 미니배치 크기

In [13]:
# MNIST 데이터셋 로드
mnist_train = dsets.MNIST(
    root='MNIST_data/', train=True, transform=transforms.ToTensor(), download=True
)  # 학습용 데이터셋
mnist_test = dsets.MNIST(
    root='MNIST_data/', train=False, transform=transforms.ToTensor(), download=True
)  # 테스트용 데이터셋

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz


100.0%


Extracting MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz


100.0%


Extracting MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz


100.0%


Extracting MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100.0%

Extracting MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw






In [14]:
print(mnist_train)  # 학습 데이터셋 정보 출력
print(mnist_test)  # 테스트 데이터셋 정보 출력

Dataset MNIST
    Number of datapoints: 60000
    Root location: MNIST_data/
    Split: Train
    StandardTransform
Transform: ToTensor()
Dataset MNIST
    Number of datapoints: 10000
    Root location: MNIST_data/
    Split: Test
    StandardTransform
Transform: ToTensor()


In [15]:
# 데이터 로더 정의
train_loader=DataLoader(dataset=mnist_train, 
                       batch_size=batch_size,
                       shuffle=True,
                        drop_last=False) #shuffle: 데이터를 섞어서 불러옴. drop_last: 마지막 배치를 버릴지 여부
 # 학습 데이터 로더
test_loader=DataLoader(dataset=mnist_test,
                        batch_size=batch_size,
                        shuffle=False,
                        drop_last=False)
                        # 테스트 데이터 로더

In [16]:
for X, Y in train_loader:
    print(X.size())
    print(Y.size())

torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([1

In [17]:
for X, Y in test_loader:
    print(X.size())
    print(Y.size())
    break
for X, Y in test_loader:
    print(X.size())
    print(Y.size())
    break

torch.Size([100, 1, 28, 28])
torch.Size([100])
torch.Size([100, 1, 28, 28])
torch.Size([100])


In [18]:
# CNN 모델 정의
import torch
import torch.nn as nn

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        #conv layer1
        #image in shape=(100, 1, 28, 28)
        #conv -> (?, 32, 28, 28)
        #pool -> (?, 32, 14, 14)

        self.layer1=nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2) #kernel size=2, stride=2
        )

        #conv layer2
        #image in shape=(?, 32, 14, 14)
        #conv -> (?, 64, 14, 14)
        #pool -> (?, 64, 7, 7)
        self.layer2=nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        #fully connected layer
        #image in shape=(?, 64, 7, 7) -> (100, 3136) 1차원으로 펼침 
        self.fc=nn.Linear(64*7*7, 10, bias=True) #10개의 class
        #완전 연결층 한정으로 가중치 초기화
        nn.init.xavier_uniform_(self.fc.weight)

    def forward(self, x):
            out=self.layer1(x)
            out=self.layer2(out)
            out=out.view(out.size(0), -1) 
            out=self.fc(out)
            return out

In [19]:
model=CNN().to(device)
criterion=nn.CrossEntropyLoss().to(device) #loss function
optimizer=torch.optim.Adam(model.parameters(), lr=learning_rate) #optimizer

In [20]:
print(model)
print(list(model.parameters()))

CNN(
  (layer1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Linear(in_features=3136, out_features=10, bias=True)
)
[Parameter containing:
tensor([[[[ 0.2166, -0.1577, -0.1258],
          [ 0.0632, -0.2605,  0.0809],
          [ 0.1567,  0.3265,  0.2777]]],


        [[[-0.1169,  0.2477,  0.2583],
          [ 0.2690, -0.1864, -0.0669],
          [-0.1780,  0.2941,  0.2786]]],


        [[[ 0.2635, -0.0440, -0.2318],
          [-0.1231, -0.0819,  0.0778],
          [ 0.0237,  0.3314, -0.0815]]],


        [[[ 0.3027,  0.2188,  0.0973],
          [ 0.0363, -0.0865, -0.2869],
          [-0.0754,  0.1529,  0.2711]]],


        [[[-0.3294, 

In [21]:
train_total_batch=len(train_loader)
test_total_batch=len(test_loader)
print(train_total_batch)
print(test_total_batch)

600
100


In [22]:
for epoch in range(epochs):
    avg_cost=0

    for X, Y in train_loader:
        X=X.to(device) #device에 데이터를 로드
        Y=Y.to(device)

        optimizer.zero_grad() #optimizer 초기화
        y_hat=model(X) #forward 연산 수행 -> 예측값 계산 
        cost=criterion(y_hat, Y)
        cost.backward() 
        optimizer.step()

        avg_cost+=cost/train_total_batch #cost 계산
    print('Epoc:', epoch, 'cost:', avg_cost)

Epoc: 0 cost: tensor(0.2315, grad_fn=<AddBackward0>)
Epoc: 1 cost: tensor(0.0655, grad_fn=<AddBackward0>)
Epoc: 2 cost: tensor(0.0478, grad_fn=<AddBackward0>)
Epoc: 3 cost: tensor(0.0382, grad_fn=<AddBackward0>)
Epoc: 4 cost: tensor(0.0317, grad_fn=<AddBackward0>)
Epoc: 5 cost: tensor(0.0273, grad_fn=<AddBackward0>)
Epoc: 6 cost: tensor(0.0228, grad_fn=<AddBackward0>)
Epoc: 7 cost: tensor(0.0200, grad_fn=<AddBackward0>)
Epoc: 8 cost: tensor(0.0163, grad_fn=<AddBackward0>)
Epoc: 9 cost: tensor(0.0144, grad_fn=<AddBackward0>)
Epoc: 10 cost: tensor(0.0119, grad_fn=<AddBackward0>)
Epoc: 11 cost: tensor(0.0099, grad_fn=<AddBackward0>)
Epoc: 12 cost: tensor(0.0090, grad_fn=<AddBackward0>)
Epoc: 13 cost: tensor(0.0081, grad_fn=<AddBackward0>)
Epoc: 14 cost: tensor(0.0079, grad_fn=<AddBackward0>)


In [25]:
with torch.no_grad():
    avg_accuracy=0

    for X,Y in test_loader:
        X=X.to(device)
        Y=Y.to(device)
        pred=model(X)
        correct_pred=torch.argmax(pred, -1)==Y
        accuracy=correct_pred.float().sum()
        avg_accuracy+=accuracy
    avg_accuracy=avg_accuracy/test_total_batch
print('Accuracy:', avg_accuracy)

Accuracy: tensor(99.0300)
