<a href="https://colab.research.google.com/github/TAKE-JP-17/Pytorch/blob/main/CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import make_grid

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# Convert MNIST Image Files into a Tensor of 4-Dimensions(# Numbers of Images, Height, Width, Color Channels)
transform = transforms.ToTensor() # We are transforming our images into a tensor

In [None]:
# Train Data
train_data = datasets.MNIST(root='/cnn_data', train=True, download=True, transform=transform)
# 1.root:指定數據集的根目錄
# 2.'/cnn_data':是數據集存儲的路徑
# 3.訓練集:(train=True) 測試集:(train=False)
# 4.download=True:如果本地沒有找到數據集，這個參數設定會自動從網絡下載數據集並存儲到指定的 root 目錄中。如果數據集已經存在，則不會再次下載。

In [None]:
# Test Data
test_data = datasets.MNIST(root='/cnn_data', train=False, download=True, transform=transform)

In [None]:
train_data

Dataset MNIST
    Number of datapoints: 60000
    Root location: /cnn_data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [None]:
test_data

Dataset MNIST
    Number of datapoints: 10000
    Root location: /cnn_data
    Split: Test
    StandardTransform
Transform: ToTensor()

In [None]:
# Create a small batch size for images...let's say 10
train_loader = DataLoader(train_data, batch_size=10, shuffle=True) # shuffle=True:告訴 DataLoader 在每個 epoch 開始之前將數據集中的樣本隨機打亂。
test_loader = DataLoader(test_data, batch_size=10, shuffle=False)

In [None]:
# Define our CNN Model
# Describe convolutional layer and what it's doing(2 convolutional layers)
# This is just an example in the next video we'll build out the actual model
conv1 = nn.Conv2d(1,6,3,1) # 1 input, 6 output, 3 kernel size, stride it just 1 at a time
conv2 = nn.Conv2d(6,16,3,1)

In [None]:
# Grab 1 MNIST record/image
for i, (X_Train,y_train) in enumerate(train_data):
  break

In [None]:
X_Train

tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
          0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,

In [None]:
X_Train.shape # Size of MNIST Data(pixels)

torch.Size([1, 28, 28])

In [None]:
# Change 2D image into 4D batch
x = X_Train.view(1,1,28,28)

In [None]:
# Perform our first convolution
x = F.relu(conv1(x)) # Rectified Linear Unit for our activation function

In [None]:
x

tensor([[[[0.2143, 0.2143, 0.2143,  ..., 0.2143, 0.2143, 0.2143],
          [0.2143, 0.2143, 0.2143,  ..., 0.2143, 0.2143, 0.2143],
          [0.2143, 0.2143, 0.2143,  ..., 0.2143, 0.2143, 0.2143],
          ...,
          [0.2143, 0.2143, 0.0330,  ..., 0.2143, 0.2143, 0.2143],
          [0.2143, 0.2143, 0.1570,  ..., 0.2143, 0.2143, 0.2143],
          [0.2143, 0.2143, 0.2143,  ..., 0.2143, 0.2143, 0.2143]],

         [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          ...,
          [0.0000, 0.0000, 0.1952,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.1694,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

         [[0.2049, 0.2049, 0.2049,  ..., 0.2049, 0.2049, 0.2049],
          [0.2049, 0.2049, 0.2049,  ..., 0.2049, 0.2049, 0.2049],
          [0.2049, 0.2049, 0.2049,  ..., 0

In [None]:
# 1 single image, 6 is the filters we asked for
x.shape

torch.Size([1, 6, 26, 26])

In [None]:
# pass thru the pooling layer(take data away)
x = F.max_pool2d(x,2,2) # kernel(filter) of 2 and stride of 2(Stride = 2 表示每次滑動 2 個像素，這會使輸出特徵圖的寬度和高度減半。)


In [None]:
x.shape # 26 / 2 = 13

torch.Size([1, 6, 13, 13])

In [None]:
# Do our second convolutional layer
x = F.relu(conv2(x))

In [None]:
x.shape # Again, we didn't set padding so we lose 2 pixels around the outside of the image

torch.Size([1, 16, 11, 11])

In [None]:
# Pooling layer
x = F.max_pool2d(x,2,2)

In [None]:
x.shape # 11 / 2 = 5.5, but we have to round down because you can't invent data to round up

torch.Size([1, 16, 5, 5])

In [None]:
((28-2)/2-2)/2

5.5

In [None]:
# Model Class
class ConvolutionalNetwork(nn.Module):
  def __init__(self): # 構造函數（constructor）
    super().__init__() # 自動按照正確的順序調用各個父類的初始化方法，避免出現混亂或重複的初始化過程。
    self.conv1 = nn.Conv2d(1,6,3,1)
    self.conv2 = nn.Conv2d(6,16,3,1)
    # Fully Connected Layer
    self.fc1 = nn.Linear(5*5*16, 120) # 120 neurons
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 10)

  def forward(self,X):
    X = F.relu(self.conv1(X))
    X = F.max_pool2d(X,2,2) # 2*2 kernel and stride 2
    # Second Pass
    X = F.relu(self.conv2(X))
    X = F.max_pool2d(X,2,2) # 2*2 kernel and stride 2

    # Re-View to flatten it out
    X = X.view(-1,16*5*5) # negative one so that we can vary the batch size 在 view 中，-1 是一個特殊的值，它表示 PyTorch 自動推斷這個維度的大小，以使總元素數量不變。這行代碼將 X 的形狀改變為 (batch_size, 400)，其中 400 是展平後的大小，batch_size 是保持不變的批次大小，即每個樣本現在是一個大小為 400 的一維向量。這樣做通常是為了準備將數據傳遞到全連接層中。








    # Fully Connected Layers
    X = F.relu(self.fc1(X))
    X = F.relu(self.fc2(X))
    X = self.fc3(X)
    return F.log_softmax(X, dim=1) # 這通常用於分類模型的輸出層，將輸出轉換為對數概率分佈，為後續的損失計算做準備。
    # define dimension=1 dim 參數 指定了 softmax 操作應該在哪個維度上應用。dim=1 表示在第二個維度（通常是特徵維度）上進行操作。對於形狀為 (batch_size, num_classes) 的張量，dim=1 意味著對每個樣本的所有類別得分應用 log_softmax。


In [None]:
# Create an Instance of our Model
torch.manual_seed(41)
model = ConvolutionalNetwork()
model

ConvolutionalNetwork(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [None]:
# 1.Loss Function Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.001) # Smaller the Learning Rate, longer its gonna take to train. Of course, Train will be better.

In [None]:
import time
start_time = time.time()

# Create Variables To Tracks Things
epochs = 5 # Train & Test 5 times
train_losses = []
test_losses = []
train_correct = []
test_losses = []

# For Loop of Epochs
for i in range(epochs):
  trn_corr = 0
  tst_corr = 0

 # Train
 for b,(X_train,y_train) in enumerate(train_loader)
  b+=1 # start our batches at 1
  y_pred = model(X_train) # get predicted values from the training set. Not flattened 2D
  loss = criterion(y_pred,y_train) # how off are we? Compare the predictions to correct answers in y_train

  predicted = torch.max(y_pred.data,1)[1] # add up the number of correct predictions. Indexed off the first point.
  # torch.max 用於在指定維度上找到最大值。這裡的 1 表示沿著第一個維度（通常是類別的維度）尋找最大值。
  # ,1):這個函數會返回兩個張量：第一個是每個樣本中最大值的張量，第二個是這些最大值所在的位置（即索引）。
  # [1]:表示選擇第二個輸出，即最大值所在的索引，這個索引代表模型預測的類別標籤。
  # predicted:最後，這個索引被賦值給變量 predicted，表示模型預測的類別標籤。
  batch_corr = (predicted == y_train).sum() # how many we got correct from this batch. True=1, False=0, sum those up.
  trn_corr += batch_corr # keep track as we go along in training.

  # Update our parameters

  # Print out some results

 # Test


current_time = time.time()
total = current_time - start_time
print(f'Training Took: {total/60} minutes!')