# CNN 무작정 연습해보기 : MNIST

## 라이브러리 불러오기

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

from torchsummary import summary

## 데이터셋 불러오기

- 데이터셋 다운로드

In [None]:
training_dataset = datasets.MNIST(root='dataset',
                                  train=True,
                                  download=True,
                                  transform=ToTensor(),
                                  )

test_dataset = datasets.MNIST(root='dataset',
                              train=False,
                              download=False,
                              transform=ToTensor(),
                              )

- 각 변수에 데이터셋이 담겨있는 상태

In [None]:
training_dataset

In [None]:
test_dataset

## 데이터셋 구경하기

In [None]:
figure = plt.figure(figsize=(16, 16))
cols, rows = 5, 5

for i in range(1, cols*rows+1) :
    figure.add_subplot(rows, cols, i)

    sample_idx = torch.randint(len(training_dataset), size=(1,)).item()
    img, label = training_dataset[sample_idx]

    plt.title(f'Actual: {label}')
    plt.axis("off")
    plt.imshow(img.squeeze(), cmap="gray")

plt.show()

## 데이터로더 생성하기
- 데이터셋을 순차적으로 조회 가능하도록 만드는 작업

In [None]:
batch_size = 32

train_dataloader = DataLoader(
                              )

test_dataloader = DataLoader(
                             )

In [None]:
for x, y in train_dataloader :
    print(f'학습 데이터의 형태 [N, C, H, W] : {x.shape}')
    print(f'학습 데이터의 데이터 타입: {x.dtype}')

    print(f'정답 데이터의 형태 : {y.shape}')
    print(f'정답 데이터의 데이터 타입: {y.dtype}')
    break

## 연산 장치 설정

In [None]:
device = ('cuda' if torch.cuda.is_available() else 'cpu')

print(f'연산 장치 : {device}')

## 모델 정의 및 생성

- 모델의 형태 구성

In [None]:
class BasicCNN(nn.Module) :
    def __init__(self) :
        super().__init__()
        self.basicCNN = nn.Sequential(

        )

    def forward(self, x) :
        x = self.basicCNN(x)
        return x

In [None]:
model = BasicCNN().to(device)
model

In [None]:
summary(model, (1, 28, 28))

- 손실 함수와 옵티마이저 설정

In [None]:
loss_fn =
optim =

In [None]:
loss_fn

In [None]:
optim

- 학습 절차를 함수화

In [None]:
def train(dataloader, model, loss_fn, optim) :
    model.train()
    size = len(dataloader.dataset)

    for batch, (x, y) in enumerate(dataloader) :
        x, y = x.to(device), y.to(device)

        y_pred = model(x)
        loss = loss_fn(y_pred, y)

        optim.zero_grad()
        loss.backward()
        optim.step()

        if batch % 100 == 0 :
            loss = loss.item()
            current = (batch+1) * len(x)
            print(f'[{current:5d}/{size:5d}] | loss: {loss:.4f}')

In [None]:
def test(dataloader, model, loss_fn) :
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)

    test_loss = 0
    correct = 0

    with torch.no_grad() :
        for x, y in dataloader :
            x, y = x.to(device), y.to(device)

            y_pred = model(x)
            test_loss = test_loss + loss_fn(y_pred, y).item()
            correct = correct + (y_pred.argmax(1)==y).type(torch.float).sum().item()

    test_loss = test_loss / num_batches
    correct = correct / size

    print(f'Accuracy: {100*correct:.2f}% | Avg_loss: {test_loss:.4f}')

In [None]:
epochs = 3

for e in range(epochs) :
    print(f'Epoch {e+1}')
    print('--------------------------')

    train(train_dataloader, model, loss_fn, optim)
    test(test_dataloader, model, loss_fn)
    print('--------------------------')

print('==============================')
print('End')

## 테스트 결과 살펴보기

In [None]:
model.eval()

rand_idx = torch.randint(0, 10000, size=(1,)).item()
x, y = test_dataset[rand_idx][0].view((-1,1,28,28)), test_dataset[rand_idx][1]

with torch.no_grad() :
    x = x.to(device)
    y_pred = model(x)

    predicted = y_pred.argmax()
    actual = y

    print(f'Predicted: {predicted} | Actual: {actual}')

- 시각화 하여 살펴보기

In [None]:
model.eval()

figure = plt.figure(figsize=(16, 16))
cols, rows = 5, 5

for i in range(1, cols*rows+1) :
    figure.add_subplot(rows, cols, i)

    rand_idx = torch.randint(0, 10000, size=(1,)).item()
    x, y = test_dataset[rand_idx][0].view((-1,1,28,28)), test_dataset[rand_idx][1]

    with torch.no_grad() :
        x = x.to(device)
        y_pred = model(x)

        predicted = y_pred.argmax()
        actual = y

    plt.title(f'Predicted: {predicted} | Actual: {actual}')
    plt.axis("off")
    plt.imshow(x.squeeze(), cmap="gray")

plt.show()

- 틀린 것만 시각화 하여 살펴보기

In [None]:
model.eval()

predicted_list, actual_list = [], []

for i in range(len(test_dataset)) :
    x, y = test_dataset[i][0].view((-1,1,28,28)), test_dataset[i][1]

    with torch.no_grad() :
        x = x.to(device)
        y_pred = model(x)

        predicted = y_pred.argmax().item()
        actual = y

        predicted_list.append(predicted)
        actual_list.append(actual)

In [None]:
predicted_list[:5], actual_list[:5]

In [None]:
false_idx_list = []

for i in range(len(predicted_list)) :
    if predicted_list[i] != actual_list[i] :
        false_idx_list.append(i)

false_idx_list[:5]

In [None]:
model.eval()

figure = plt.figure(figsize=(16, 16))
cols, rows = 5, 5

for i in range(1, cols*rows+1) :
    figure.add_subplot(rows, cols, i)

    sample_idx = false_idx_list[ torch.randint(len(false_idx_list), size=(1,)).item() ]

    with torch.no_grad() :
        temp_x = test_dataset[ sample_idx ][0].view((-1,1,28,28))
        temp_y_pred = model(temp_x)

        temp_predicted = temp_y_pred.argmax().item()
        temp_actual = test_dataset[ sample_idx ][1]

    plt.title(f'Predicted: {temp_predicted} | Actual: {temp_actual}')
    plt.axis("off")
    plt.imshow(temp_x.squeeze(), cmap="gray")

plt.show()

## **내가 쓴 숫자도 인식할까?**
---
## **구글 드라이브에 손글씨 이미지를 업로드!**
###**순서**
1. 그림판으로 숫자를 그려서 저장한다. (저장할 때, 숫자를 파일명 맨 앞에 명시하는 것을 권장! ex) 3.png, 3_1.png )
2. 구글 드라이브 첫 화면에 my_data 라는 폴더를 만든다.
3. my_data 폴더 안에 my_mnist 폴더를 만든다.
4. my_mnist 폴더 안에 1번 과정에서 만든 이미지를 업로드한다.
5. 30초 정도 기다립시다.
6. 아래의 코드들을 실행해본다.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import glob

In [None]:
data_path = glob.glob('/content/drive/MyDrive/my_data/my_mnist/*')
data_path

In [None]:
mydata_y = []

for image_file in data_path :
    # print(path[40:41])
    mydata_y.append(int(image_file[40:41]))

In [None]:
mydata_y_ts = torch.tensor(mydata_y, dtype=torch.long)

In [None]:
from PIL import Image
from torchvision.transforms import PILToTensor

In [None]:
mydata_x = []

for image_file in data_path :
    temp = Image.open(image_file)
    temp = temp.resize((28,28))
    temp = temp.convert('L')
    temp = PILToTensor()(temp)
    temp = temp.type(torch.float32)
    # print(temp.dtype)
    mydata_x.append(temp)

In [None]:
mydata_x_ts = torch.stack(mydata_x)
mydata_x_ts.shape

In [None]:
from torch.utils.data import TensorDataset

In [None]:
mydataset = TensorDataset(mydata_x_ts, mydata_y_ts)

In [None]:
for x, y in mydataset :
    print(x)
    print(y)
    break

In [None]:
mydataset[0][0].shape

In [None]:
mydataset[0][1].item()

In [None]:
# for i in range(30) :
#     print(mydataset[i][0].dtype)
#     print(mydataset[i][1].dtype)

In [None]:
model.eval()

for i in range(40) :
    temp_x, temp_y = mydataset[i][0].view((-1,1,28,28)), mydataset[i][1]

    with torch.no_grad() :
        temp_x = temp_x.to(device)
        y_pred = model(temp_x)

        predicted = y_pred.argmax()
        actual = temp_y

        print(f'Predicted: {predicted} | Actual: {actual}')

In [None]:
model.eval()

figure = plt.figure(figsize=(16, 16))
cols, rows = 5, 8

for i in range(1, cols*rows+1) :
    figure.add_subplot(rows, cols, i)

    x, y = mydataset[i-1][0].view((-1,1,28,28)), mydataset[i-1][1]

    with torch.no_grad() :
        x = x.to(device)
        y_pred = model(x)

        predicted = y_pred.argmax()
        actual = y

    plt.title(f'Predicted: {predicted} | Actual: {actual}')
    plt.axis("off")
    plt.imshow(x.squeeze(), cmap="gray")

plt.show()