In [12]:
import torch
from torch import nn
from torch import optim
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import tqdm
import numpy as np
from itertools import chain
import matplotlib.pyplot as plt

# 1. MNIST 데이터셋의 전처리를 위한 변환 정의
transform = transforms.Compose([
    transforms.ToTensor(),  # 이미지를 PyTorch 텐서로 변환
    transforms.Normalize((0.5,), (0.5,))  # 데이터를 [-1, 1] 범위로 정규화
])

# 2. MNIST 데이터셋 다운로드 및 탑재
train_dataset = datasets.MNIST(root='MNIST_dataset', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='MNIST_dataset', train=False, transform=transform, download=True)

# 3. DataLoader를 사용하여 배치로 데이터셋 불러오기
train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=10000, shuffle=False)

# 4. 데이터 확인 (첫 번째 배치에서 이미지와 레이블 가져오기)
images, labels = next(iter(train_loader))
print(f"이미지 배치 크기: {images.size()}")
print(f"레이블 배치 크기: {labels.size()}")

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to MNIST_dataset\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:02<00:00, 4560111.80it/s]


Extracting MNIST_dataset\MNIST\raw\train-images-idx3-ubyte.gz to MNIST_dataset\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to MNIST_dataset\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 152815.20it/s]


Extracting MNIST_dataset\MNIST\raw\train-labels-idx1-ubyte.gz to MNIST_dataset\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to MNIST_dataset\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:01<00:00, 1356681.74it/s]


Extracting MNIST_dataset\MNIST\raw\t10k-images-idx3-ubyte.gz to MNIST_dataset\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to MNIST_dataset\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<?, ?it/s]

Extracting MNIST_dataset\MNIST\raw\t10k-labels-idx1-ubyte.gz to MNIST_dataset\MNIST\raw

이미지 배치 크기: torch.Size([32, 1, 28, 28])
레이블 배치 크기: torch.Size([32])





In [13]:
model = nn.Sequential(
    nn.Linear(in_features=28*28, out_features=256),
    nn.Linear(256, 128),
    nn.Linear(128, 64),
    nn.Linear(64, 10)
)

In [14]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001)

In [16]:
epochs = 35

for epoch in tqdm(range(epochs), desc="Training: "):
    total_loss = 0  # 매 epoch마다 초기화
    total_acc = 0   # 매 epoch마다 초기화
    for X, y in tqdm(train_loader, desc='Batch Processing: ', leave=False):
        optimizer.zero_grad()

        # 입력 데이터 차원 변환
        X = X.view(-1, 28*28)

        # 모델의 예측값 계산
        hypothesis = model(X)

        # 손실 계산
        loss = criterion(hypothesis, y)
        total_loss += loss.item() / len(train_loader)  # 손실 값을 누적, 평균 계산

        # 정확도 계산 (각 배치에서 argmax를 사용하여 예측)
        preds = torch.argmax(hypothesis, dim=1)
        acc = (preds == y).float().mean()  # 정확도 계산
        total_acc += acc.item() / len(train_loader)  # 정확도를 누적, 평균 계산

        # 역전파 및 최적화
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/{epochs}] | Training Loss: {total_loss:.4f} | Training Accuracy: {total_acc:.4f}")

Training:   0%|          | 0/35 [00:00<?, ?it/s]

Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [1/35] | Training Loss: 0.3419 | Training Accuracy: 0.9017


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [2/35] | Training Loss: 0.3363 | Training Accuracy: 0.9032


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [3/35] | Training Loss: 0.3268 | Training Accuracy: 0.9056


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [4/35] | Training Loss: 0.3210 | Training Accuracy: 0.9075


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [5/35] | Training Loss: 0.3170 | Training Accuracy: 0.9085


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [6/35] | Training Loss: 0.3127 | Training Accuracy: 0.9110


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [7/35] | Training Loss: 0.3089 | Training Accuracy: 0.9126


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [8/35] | Training Loss: 0.3077 | Training Accuracy: 0.9126


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [9/35] | Training Loss: 0.3060 | Training Accuracy: 0.9128


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [10/35] | Training Loss: 0.3040 | Training Accuracy: 0.9128


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [11/35] | Training Loss: 0.3022 | Training Accuracy: 0.9129


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [12/35] | Training Loss: 0.2986 | Training Accuracy: 0.9146


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [13/35] | Training Loss: 0.2979 | Training Accuracy: 0.9156


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [14/35] | Training Loss: 0.2979 | Training Accuracy: 0.9146


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [15/35] | Training Loss: 0.2967 | Training Accuracy: 0.9155


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [16/35] | Training Loss: 0.2943 | Training Accuracy: 0.9160


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [17/35] | Training Loss: 0.2941 | Training Accuracy: 0.9162


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [18/35] | Training Loss: 0.2930 | Training Accuracy: 0.9171


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [19/35] | Training Loss: 0.2935 | Training Accuracy: 0.9164


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [20/35] | Training Loss: 0.2923 | Training Accuracy: 0.9165


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [21/35] | Training Loss: 0.2904 | Training Accuracy: 0.9168


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [22/35] | Training Loss: 0.2902 | Training Accuracy: 0.9171


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [23/35] | Training Loss: 0.2888 | Training Accuracy: 0.9176


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [24/35] | Training Loss: 0.2879 | Training Accuracy: 0.9179


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [25/35] | Training Loss: 0.2883 | Training Accuracy: 0.9184


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [26/35] | Training Loss: 0.2879 | Training Accuracy: 0.9179


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [27/35] | Training Loss: 0.2870 | Training Accuracy: 0.9180


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [28/35] | Training Loss: 0.2850 | Training Accuracy: 0.9187


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [29/35] | Training Loss: 0.2856 | Training Accuracy: 0.9183


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [30/35] | Training Loss: 0.2840 | Training Accuracy: 0.9191


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [31/35] | Training Loss: 0.2854 | Training Accuracy: 0.9182


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [32/35] | Training Loss: 0.2841 | Training Accuracy: 0.9200


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [33/35] | Training Loss: 0.2847 | Training Accuracy: 0.9186


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [34/35] | Training Loss: 0.2829 | Training Accuracy: 0.9194


Batch Processing:   0%|          | 0/1875 [00:00<?, ?it/s]

Epoch [35/35] | Training Loss: 0.2839 | Training Accuracy: 0.9190


In [18]:
wrong = []
tg = []
with torch.inference_mode():
    for test_x, test_y in test_loader:
        test_x = test_x.view(-1, 28*28)
        opred = model(test_x)
        pred = torch.softmax(opred, dim=0)
        print(f'opred\'s shape: {opred.shape}')
        print(f'pred\'s shape: {pred.shape}')

        for i, y in enumerate(test_y):
            if y != torch.argmax(pred, dim=1)[i] :
                wrong.append(i)
                tg.append(pred[i][y])

opred's shape: torch.Size([10000, 10])
pred's shape: torch.Size([10000, 10])


In [19]:
wr_tg = np.array(list(zip(wrong, tg)))
print(f'wnum의 값: {wr_tg.shape[0]}')

wnum의 값: 2685


In [20]:
wr_tg[0:10, :]

array([[1.00000000e+00, 1.70391013e-06],
       [4.00000000e+00, 2.33039984e-07],
       [8.00000000e+00, 1.77007038e-07],
       [1.00000000e+01, 3.74318176e-08],
       [1.10000000e+01, 7.37665857e-08],
       [1.30000000e+01, 1.07716225e-07],
       [1.90000000e+01, 3.88877908e-07],
       [2.40000000e+01, 8.95386094e-08],
       [2.60000000e+01, 1.46006187e-06],
       [2.70000000e+01, 1.40636564e-06]])

In [21]:
import pandas as pd

res = pd.DataFrame(data=wr_tg, columns=["idx", "Prob"]).sort_values(by="Prob")
res.head(10)

Unnamed: 0,idx,Prob
545,1609.0,2.606329e-14
607,1790.0,1.232638e-13
1998,6651.0,2.346965e-13
2246,7886.0,2.382501e-13
1269,3811.0,5.59933e-13
1369,4176.0,1.33006e-12
38,149.0,1.703624e-12
1510,4615.0,2.707134e-12
845,2488.0,3.326575e-12
2556,9634.0,3.419418e-12


In [22]:
for i in range(5):
    ex_num = res.iloc[i, 0].astype(int)
    print(f'예제번호: {ex_num: d}')

    label = test_y[ex_num]
    print(f'정답 레이블: {label: 1d}')

    output = torch.argmax(pred, dim=1)[i]
    print(f'예측 레이블: {output: 1d}')

    prob = res.iloc[i, 1]
    print(f'이 예제의 정답레이블에 대한 확률: {prob: .10f}')

    max_prob = pred.max(dim=1)[0][i]
    print(f'이 예제의 모델이 예측한 레이블의 확률: {max_prob: .10f}')

    print('이 예제의 이미지:')
    image = np.reshape(test_x[i], (28,28))
    plt.imshow(image, cmap='gray')
    plt.clim(0,1)
    plt.colorbar()
    plt.show()
    print('\n\n')

예제번호:  1609
정답 레이블:  2
예측 레이블:  7
이 예제의 정답레이블에 대한 확률:  0.0000000000
이 예제의 모델이 예측한 레이블의 확률:  0.0000363401
이 예제의 이미지:


: 