In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn.init


In [None]:
device='cuda' if torch.cuda.is_available() else 'cpu'
print(device)
torch.manual_seed(777)
if device == 'cuda' :
    torch.cuda.manual_seed_all(777)

cpu


In [4]:
learing_rate = 0.001
epochs = 30
batch_size = 100

In [None]:
mnist_train = dsets.MNIST(root='mnist_data',
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True
                          )
mnist_test = dsets.MNIST(root='mnist_data',
                          train=False,
                          transform=transforms.ToTensor(),
                          download=True)

In [7]:
print(mnist_train)
print(mnist_test)

Dataset MNIST
    Number of datapoints: 60000
    Root location: mnist_data
    Split: Train
    StandardTransform
Transform: ToTensor()
Dataset MNIST
    Number of datapoints: 10000
    Root location: mnist_data
    Split: Test
    StandardTransform
Transform: ToTensor()


In [8]:
train_loader = DataLoader(dataset=mnist_train,
                          batch_size=batch_size,
                          shuffle=True,
                          drop_last=True)
test_loader = DataLoader(dataset=mnist_test,
                         batch_size=batch_size,
                         shuffle=True,
                         drop_last=True)

In [9]:
for X, Y in test_loader:
    print(X.size(), Y.size())

torch.Size([100, 1, 28, 28]) torch.Size([100])
torch.Size([100, 1, 28, 28]) torch.Size([100])
torch.Size([100, 1, 28, 28]) torch.Size([100])
torch.Size([100, 1, 28, 28]) torch.Size([100])
torch.Size([100, 1, 28, 28]) torch.Size([100])
torch.Size([100, 1, 28, 28]) torch.Size([100])
torch.Size([100, 1, 28, 28]) torch.Size([100])
torch.Size([100, 1, 28, 28]) torch.Size([100])
torch.Size([100, 1, 28, 28]) torch.Size([100])
torch.Size([100, 1, 28, 28]) torch.Size([100])
torch.Size([100, 1, 28, 28]) torch.Size([100])
torch.Size([100, 1, 28, 28]) torch.Size([100])
torch.Size([100, 1, 28, 28]) torch.Size([100])
torch.Size([100, 1, 28, 28]) torch.Size([100])
torch.Size([100, 1, 28, 28]) torch.Size([100])
torch.Size([100, 1, 28, 28]) torch.Size([100])
torch.Size([100, 1, 28, 28]) torch.Size([100])
torch.Size([100, 1, 28, 28]) torch.Size([100])
torch.Size([100, 1, 28, 28]) torch.Size([100])
torch.Size([100, 1, 28, 28]) torch.Size([100])
torch.Size([100, 1, 28, 28]) torch.Size([100])
torch.Size([1

In [11]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()

        # ─────────────────────────────────────────────
        # [Layer 1]
        # 입력 이미지 크기: (배치크기, 채널=1, 높이=28, 너비=28) → MNIST 흑백 이미지
        # Conv2d: (1, 28, 28) → (32, 28, 28)  [채널 1 → 32, 크기는 padding=1로 유지]
        # MaxPool2d: (32, 28, 28) → (32, 14, 14) [2x2 풀링으로 크기 절반]
        # ─────────────────────────────────────────────
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),  # 3x3 필터, 32개, stride=1
            nn.ReLU(),                                             # 활성화 함수
            nn.MaxPool2d(kernel_size=2, stride=2)                  # 2x2 맥스풀링 (크기 절반)
        )
        # ─────────────────────────────────────────────
        # [Layer 2]
        # 입력 크기: (32, 14, 14)
        # Conv2d: (32, 14, 14) → (64, 14, 14)  [채널 32 → 64, 크기 유지]
        # MaxPool2d: (64, 14, 14) → (64, 7, 7) [2x2 풀링 → 크기 절반]
        # ─────────────────────────────────────────────
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        # ─────────────────────────────────────────────
        # [Fully Connected Layer (FC)]
        # Conv 결과: (64, 7, 7) → flatten하면 64×7×7 = 3136 차원
        # FC 입력: 3136 → 출력: 10개 클래스 (MNIST 숫자 분류)
        # 가중치는 Xavier 방식으로 초기화
        # ─────────────────────────────────────────────
        self.fc = nn.Linear(7 * 7 * 64, 10, bias=True)
        nn.init.xavier_uniform_(self.fc.weight)  # FC 층 가중치 Xavier 초기화


    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

In [13]:
model = CNN().to(device)
crit = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learing_rate)

In [14]:
print(model)
print(list(model.parameters()))

CNN(
  (layer1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Linear(in_features=3136, out_features=10, bias=True)
)
[Parameter containing:
tensor([[[[-0.2602,  0.0484,  0.0056],
          [ 0.1210, -0.3248, -0.1715],
          [ 0.1296, -0.0701,  0.2822]]],


        [[[ 0.2063, -0.3326, -0.2659],
          [-0.1509, -0.1819,  0.2412],
          [-0.2311,  0.2483, -0.2072]]],


        [[[ 0.2622,  0.2521, -0.1654],
          [-0.2284,  0.3138, -0.2838],
          [ 0.2257,  0.0032,  0.0550]]],


        [[[-0.1942,  0.1822,  0.0797],
          [-0.1015,  0.1407, -0.1863],
          [ 0.2450, -0.0700,  0.0707]]],


        [[[ 0.2520, 

In [16]:
train_total_batch = len(train_loader)
test_total_batch = len(test_loader)
print(train_total_batch)
print(test_total_batch)

600
100


In [18]:
for epoch  in range(epochs):
    avg_cost=0

    for X, Y in train_loader:
        X=X.to(device)
        Y=Y.to(device)

        optimizer.zero_grad()
        y_hat=model(X)
        cost = crit(y_hat, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost/test_total_batch
    print(f'epoch:{epoch}, cost:{avg_cost}')

epoch:0, cost:0.7299035787582397
epoch:1, cost:0.36254560947418213
epoch:2, cost:0.2660159766674042
epoch:3, cost:0.21473556756973267
epoch:4, cost:0.17717625200748444
epoch:5, cost:0.14501798152923584
epoch:6, cost:0.12157994508743286
epoch:7, cost:0.1050180196762085
epoch:8, cost:0.0912265032529831
epoch:9, cost:0.07215183973312378
epoch:10, cost:0.06477440148591995
epoch:11, cost:0.062059711664915085
epoch:12, cost:0.04746905341744423
epoch:13, cost:0.04100324586033821
epoch:14, cost:0.03030475415289402
epoch:15, cost:0.03258734568953514
epoch:16, cost:0.027742844074964523
epoch:17, cost:0.02085844799876213
epoch:18, cost:0.03712589293718338
epoch:19, cost:0.021218230947852135
epoch:20, cost:0.00812607817351818
epoch:21, cost:0.0023559723049402237
epoch:22, cost:0.03917263448238373
epoch:23, cost:0.02208077535033226
epoch:24, cost:0.011585459113121033
epoch:25, cost:0.01791551522910595
epoch:26, cost:0.009525132365524769
epoch:27, cost:0.017792372032999992
epoch:28, cost:0.014438909

In [19]:
with torch.no_grad():
    x_test = mnist_test.data.view(len(mnist_test), 1, 28, 28).float().to(device)
    y_test = mnist_test.targets.to(device)

    pred=model(x_test)
    correct_pred=torch.argmax(pred, -1) == y_test
    accuracy = correct_pred.float().mean()
    print(f'ACCURACY : ', {accuracy})

ACCURACY :  {tensor(0.9897)}
