<a href="https://colab.research.google.com/github/TAKE-JP-17/Pytorch/blob/main/CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import make_grid

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline

In [10]:
# Convert MNIST Image Files into a Tensor of 4-Dimensions(# Numbers of Images, Height, Width, Color Channels)
transform = transforms.ToTensor() # We are transforming our images into a tensor

In [11]:
# Train Data
train_data = datasets.MNIST(root='/cnn_data', train=True, download=True, transform=transform)
# 1.root:指定數據集的根目錄
# 2.'/cnn_data':是數據集存儲的路徑
# 3.訓練集:(train=True) 測試集:(train=False)
# 4.download=True:如果本地沒有找到數據集，這個參數設定會自動從網絡下載數據集並存儲到指定的 root 目錄中。如果數據集已經存在，則不會再次下載。

In [12]:
# Test Data
test_data = datasets.MNIST(root='/cnn_data', train=False, download=True, transform=transform)

In [13]:
train_data

Dataset MNIST
    Number of datapoints: 60000
    Root location: /cnn_data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [14]:
test_data

Dataset MNIST
    Number of datapoints: 10000
    Root location: /cnn_data
    Split: Test
    StandardTransform
Transform: ToTensor()

In [17]:
# Create a small batch size for images...let's say 10
train_loader = DataLoader(train_data, batch_size=10, shuffle=True) # shuffle=True:告訴 DataLoader 在每個 epoch 開始之前將數據集中的樣本隨機打亂。
test_loader = DataLoader(test_data, batch_size=10, shuffle=False)

In [18]:
# Define our CNN Model
# Describe convolutional layer and what it's doing(2 convolutional layers)
# This is just an example in the next video we'll build out the actual model
conv1 = nn.Conv2d(1,6,3,1) # 1 input, 6 filters, 3 kernel size, stride it just 1 at a time
conv2 = nn.Conv2d(6,16,3,1)

In [23]:
# Grab 1 MNIST record/image
for i, (X_Train,y_train) in enumerate(train_data):
  break

In [24]:
X_Train

tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,

In [25]:
X_Train.shape # Size of MNIST Data(pixels)

torch.Size([1, 28, 28])

In [26]:
# Change 2D image into 4D batch
x = X_Train.view(1,1,28,28)

In [27]:
# Perform our first convolution
x = F.relu(conv1(x)) # Rectified Linear Unit for our activation function

In [28]:
x

tensor([[[[0.3059, 0.3059, 0.3059,  ..., 0.3059, 0.3059, 0.3059],
          [0.3059, 0.3059, 0.3059,  ..., 0.3059, 0.3059, 0.3059],
          [0.3059, 0.3059, 0.3059,  ..., 0.3059, 0.3059, 0.3059],
          ...,
          [0.3059, 0.3059, 0.4306,  ..., 0.3059, 0.3059, 0.3059],
          [0.3059, 0.3059, 0.4347,  ..., 0.3059, 0.3059, 0.3059],
          [0.3059, 0.3059, 0.3059,  ..., 0.3059, 0.3059, 0.3059]],

         [[0.2105, 0.2105, 0.2105,  ..., 0.2105, 0.2105, 0.2105],
          [0.2105, 0.2105, 0.2105,  ..., 0.2105, 0.2105, 0.2105],
          [0.2105, 0.2105, 0.2105,  ..., 0.2105, 0.2105, 0.2105],
          ...,
          [0.2105, 0.2105, 0.0660,  ..., 0.2105, 0.2105, 0.2105],
          [0.2105, 0.2105, 0.2357,  ..., 0.2105, 0.2105, 0.2105],
          [0.2105, 0.2105, 0.2105,  ..., 0.2105, 0.2105, 0.2105]],

         [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0

In [29]:
# 1 single image, 6 is the filters we asked for
x.shape

torch.Size([1, 6, 26, 26])

In [32]:
# pass thru the pooling layer
x = F.max_pool2d(x,2,2) # kernel(filter) of 2 and stride of 2(Stride = 2 表示每次滑動 2 個像素，這會使輸出特徵圖的寬度和高度減半。)


In [33]:
x.shape # 26 / 2 = 13

torch.Size([1, 6, 13, 13])

In [34]:
# Do our second convolutional layer
x = F.relu(conv2(x))

In [35]:
x.shape # Again, we didn't set padding so we lose 2 pixels around the outside of the image

torch.Size([1, 16, 11, 11])

In [36]:
# Pooling layer
x = F.max_pool2d(x,2,2)

In [37]:
x.shape # 11 / 2 = 5.5, but we have to round down because you can't invent data to round up

torch.Size([1, 16, 5, 5])

In [38]:
((28-2)/2-2)/2

5.5