<a href="https://colab.research.google.com/github/R12942159/NTU_DLCV/blob/main/2023ML_HW2_CNN_myself.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## ML HW2 sample code

若有問題可直接寄信至助教信箱，環境部分推薦使用前述所提到之環境，若使用自己的電腦執行，請注意環境的相容性


#### Import packages

In [1]:
import os
import random
import glob
import csv
import torch
import torch.nn as nn
import numpy as np
import pandas as pd

from torch.optim import Adam
from torchvision import transforms as tr
from torch.utils.data import DataLoader, Dataset
from PIL import Image

In [None]:
!gdown 1drrS7gnyzUJPPiQcDWcHdIXqzjy2n3yZ
!unzip 'HW2.zip'

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using: {device}")

Using: cuda


In [4]:
import os
import random


# search file paths
def get_img_paths(path):
  paths = [os.path.join(path, i) for i in os.listdir(path) if i.endswith('.jpg')]
  return paths

train_paths = sorted(get_img_paths('/content/data/train'))
test_paths = sorted(get_img_paths('/content/data/test'))

In [5]:
import pandas as pd

img_labels = pd.read_csv('/content/data/train.csv')['label'].values.tolist()

In [6]:
from PIL import Image


IMG_SIZE = Image.open(train_paths[10]).convert('RGB')
IMG_SIZE = int(np.array(IMG_SIZE).shape[1])

In [None]:
import torch
import re
from torchvision.transforms import v2
from torchvision.transforms.functional import hflip, vflip


class Hw2_train_ds(torch.utils.data.Dataset):
    def __init__(self, img_path, img_label, transform, augmentation=True) -> None:
        self.img_path = img_path
        self.img_label = img_label
        self.transform = transform
        self.augmentation = augmentation

        def origine(x): return x
        if augmentation:
            self.augmentation = [origine, hflip, vflip]
        else:
            self.augmentation = [origine]

    def __len__(self):
        return len(self.img_path)

    def __getitem__(self, idx):
        # read img
        path = self.img_path[idx]
        img = Image.open(path).convert('RGB') # img = (64,64,3)

        # transform/normalize img
        img = self.transform(img)

        # augmentation img
        augment = random.choice(self.augmentation)
        img = augment(img)

        # get label
        label = self.img_label[idx]
        label = torch.tensor(label)

        return img, label

class Hw2_test_ds(torch.utils.data.Dataset):
    def __init__(self, img_path, transform):
        self.img_path = img_path
        self.transform = transform

    def __getitem__(self, idx):
        path = self.img_path[idx]
        img = Image.open(path).convert('RGB')
        img = self.transform(img)

        return img, re.split(r'[./]', path)[-2]

In [8]:
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]

img_ds = Hw2_train_ds(train_paths,
                      img_labels,
                      transform = tr.Compose([
                          tr.ToTensor(),
                          tr.Normalize(mean=mean, std=std),
                          ]),
                      augmentation = True,
                      )

test_ds = Hw2_test_ds(test_paths,
                     transform = tr.Compose([
                          tr.ToTensor(),
                          tr.Normalize(mean=mean, std=std),
                          ]),
                      )

In [9]:
from torch.utils.data import DataLoader, random_split


# Randomly divided into a training and validation dataset.
dataset_size = len(img_ds)
val_size = int(0.2 * dataset_size)
train_ds, val_ds = random_split(img_ds, [dataset_size - val_size, val_size])

In [10]:
# Build dataloders
BATCH_SIZE = 64

train_loader = torch.utils.data.DataLoader(train_ds, BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)
val_loader = torch.utils.data.DataLoader(val_ds, BATCH_SIZE*2, shuffle=False, num_workers=4, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_ds, BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True)



In [11]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding='same'):
        super(ConvBlock, self).__init__()

        self.convblock = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding='same'),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size, stride=1, padding='same'),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
        )

    def __call__(self, x):
        return self.convblock(x)

class CNN_scratch(nn.Module):
    def __init__(self, num_classes=7, in_channels=3, init_channels=16, size=IMG_SIZE) -> None:
        super(CNN_scratch, self).__init__()

        # conv
        self.encoder1 = ConvBlock(in_channels, init_channels) # (3, H, W) -> (16, H, W)
        self.pooling1 = nn.MaxPool2d(2)
        self.encoder2 = ConvBlock(init_channels, init_channels * 2) # (16, H/2, W/2) -> (32, H/2, W/2)
        self.pooling2 = nn.MaxPool2d(2)
        self.encoder3 = ConvBlock(init_channels * 2, init_channels * 4) # (32, H/4, W/4) -> (64, H/4, W/4)
        self.pooling3 = nn.MaxPool2d(2)
        self.encoder4 = ConvBlock(init_channels * 4, init_channels * 8) # (64, H/8, W/8) -> (128, H/8, W/8)

        self.flatten = nn.Flatten()

        self.classifier = nn.Sequential(
            nn.Linear(init_channels * 8 * (size//8) ** 2, init_channels * 4),
            nn.ReLU(),
            nn.Linear(init_channels * 4, num_classes)
        )

    def forward(self, x):
        x = self.encoder1(x)
        x = self.encoder2(self.pooling1(x))
        x = self.encoder3(self.pooling2(x))
        x = self.encoder4(self.pooling3(x))
        x = self.flatten(x)
        x = self.classifier(x)

        return x

#### Build Model

In [12]:
model = CNN_scratch().to(device)
print(model)

CNN_scratch(
  (encoder1): ConvBlock(
    (convblock): Sequential(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=same)
      (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=same)
      (4): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU()
    )
  )
  (pooling1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (encoder2): ConvBlock(
    (convblock): Sequential(
      (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
      (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU()
    )
  )
  (pooling2): MaxPool2d(kernel_size

#### Model Summary

In [13]:
from torchsummary import summary


summary(model, (3, IMG_SIZE, IMG_SIZE))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 64, 64]             448
       BatchNorm2d-2           [-1, 16, 64, 64]              32
              ReLU-3           [-1, 16, 64, 64]               0
            Conv2d-4           [-1, 16, 64, 64]           2,320
       BatchNorm2d-5           [-1, 16, 64, 64]              32
              ReLU-6           [-1, 16, 64, 64]               0
         MaxPool2d-7           [-1, 16, 32, 32]               0
            Conv2d-8           [-1, 32, 32, 32]           4,640
       BatchNorm2d-9           [-1, 32, 32, 32]              64
             ReLU-10           [-1, 32, 32, 32]               0
           Conv2d-11           [-1, 32, 32, 32]           9,248
      BatchNorm2d-12           [-1, 32, 32, 32]              64
             ReLU-13           [-1, 32, 32, 32]               0
        MaxPool2d-14           [-1, 32,

#### Define training and testing process

In [14]:
from tqdm.auto import tqdm # (optional) progress bar


def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset) # number of samples
    num_batches = len(dataloader) # batches per epoch

    model.train() # to training mode.
    epoch_loss, epoch_correct = 0, 0
    for batch_i, (x, y) in enumerate(tqdm(dataloader, leave=False)):
        x, y = x.to(device), y.to(device) # move data to GPU

        pred = model(x)
        loss = loss_fn(pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # write to logs
        epoch_loss += loss.item() # tensor -> python value
        # (N, Class)
        epoch_correct += (pred.argmax(dim=1) == y).sum().item()

    # return avg loss of epoch, acc of epoch
    return epoch_loss/num_batches, epoch_correct/size


def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset) # number of samples
    num_batches = len(dataloader) # batches per epoch

    model.eval() # model to test mode.
    epoch_loss, epoch_correct = 0, 0

    # No gradient for test data
    with torch.no_grad():
        for batch_i, (x, y) in enumerate(dataloader):
            x, y = x.to(device), y.to(device)

            pred = model(x)
            loss = loss_fn(pred, y)

            # write to logs
            epoch_loss += loss.item()
            epoch_correct += (pred.argmax(1) == y).sum().item()

    return epoch_loss/num_batches, epoch_correct/size

In [15]:
EPOCHS = 300

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

logs = {
    'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []
}

# early stopping
patience = 20
counter = 0
best_loss = np.inf

for epoch in tqdm(range(EPOCHS)):
    train_loss, train_acc = train(train_loader, model, loss_fn, optimizer)
    val_loss, val_acc = test(val_loader, model, loss_fn)

    print(f'EPOCH: {(epoch+1):04d} \
    train_loss: {train_loss:.4f}, train_acc: {train_acc:.3f} \
    val_loss: {val_loss:.4f}, val_acc: {val_acc:.3f} ')

    logs['train_loss'].append(train_loss)
    logs['train_acc'].append(train_acc)
    logs['val_loss'].append(val_loss)
    logs['val_acc'].append(val_acc)

    if epoch % 5 == 0:
        torch.save(model.state_dict(), f'DL_hw2_epoch{epoch+1}_model.pth')
    # chcek improvement
    if val_loss <  best_loss:
        couunter = 0
        best_loss = val_loss
        torch.save(model.state_dict(), 'DL_hw2_best_model.pth')
        torch.save(optimizer.state_dict(), 'DL_hw2_best_optimizer.pth')
        print('-------------------- Model Save --------------------')
    else:
        counter += 1
    if counter >= patience:
        print('-------------------- Early Stopping --------------------')
        break

  0%|          | 0/300 [00:00<?, ?it/s]



  0%|          | 0/324 [00:00<?, ?it/s]

EPOCH: 0001     train_loss: 1.5870, train_acc: 0.376     val_loss: 1.4629, val_acc: 0.426 
-------------------- Model Save --------------------


  0%|          | 0/324 [00:00<?, ?it/s]

EPOCH: 0002     train_loss: 1.3639, train_acc: 0.478     val_loss: 1.3753, val_acc: 0.464 
-------------------- Model Save --------------------


  0%|          | 0/324 [00:00<?, ?it/s]

EPOCH: 0003     train_loss: 1.2646, train_acc: 0.514     val_loss: 1.3535, val_acc: 0.475 
-------------------- Model Save --------------------


  0%|          | 0/324 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x79db8bf46320>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x79db8bf46320>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/

EPOCH: 0004     train_loss: 1.2004, train_acc: 0.540     val_loss: 1.3764, val_acc: 0.475 


  0%|          | 0/324 [00:00<?, ?it/s]

EPOCH: 0005     train_loss: 1.1429, train_acc: 0.566     val_loss: 1.2817, val_acc: 0.511 
-------------------- Model Save --------------------


  0%|          | 0/324 [00:00<?, ?it/s]

EPOCH: 0006     train_loss: 1.0908, train_acc: 0.586     val_loss: 1.1915, val_acc: 0.543 
-------------------- Model Save --------------------


  0%|          | 0/324 [00:00<?, ?it/s]

EPOCH: 0007     train_loss: 1.0509, train_acc: 0.603     val_loss: 1.1959, val_acc: 0.549 


  0%|          | 0/324 [00:00<?, ?it/s]

EPOCH: 0008     train_loss: 1.0139, train_acc: 0.618     val_loss: 1.2439, val_acc: 0.528 


  0%|          | 0/324 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x79db8bf46320>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x79db8bf46320>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/

EPOCH: 0009     train_loss: 0.9792, train_acc: 0.633     val_loss: 1.2706, val_acc: 0.540 


  0%|          | 0/324 [00:00<?, ?it/s]

EPOCH: 0010     train_loss: 0.9400, train_acc: 0.648     val_loss: 1.1982, val_acc: 0.549 


  0%|          | 0/324 [00:00<?, ?it/s]

EPOCH: 0011     train_loss: 0.9104, train_acc: 0.661     val_loss: 1.1994, val_acc: 0.566 


  0%|          | 0/324 [00:00<?, ?it/s]

EPOCH: 0012     train_loss: 0.8812, train_acc: 0.674     val_loss: 1.1805, val_acc: 0.563 
-------------------- Model Save --------------------


  0%|          | 0/324 [00:00<?, ?it/s]

EPOCH: 0013     train_loss: 0.8375, train_acc: 0.691     val_loss: 1.2434, val_acc: 0.559 


  0%|          | 0/324 [00:00<?, ?it/s]

EPOCH: 0014     train_loss: 0.8072, train_acc: 0.703     val_loss: 1.2677, val_acc: 0.538 


  0%|          | 0/324 [00:00<?, ?it/s]

EPOCH: 0015     train_loss: 0.7706, train_acc: 0.717     val_loss: 1.2286, val_acc: 0.566 


  0%|          | 0/324 [00:00<?, ?it/s]

EPOCH: 0016     train_loss: 0.7371, train_acc: 0.731     val_loss: 1.2712, val_acc: 0.553 


  0%|          | 0/324 [00:00<?, ?it/s]

EPOCH: 0017     train_loss: 0.7022, train_acc: 0.745     val_loss: 1.2342, val_acc: 0.575 


  0%|          | 0/324 [00:00<?, ?it/s]

EPOCH: 0018     train_loss: 0.6647, train_acc: 0.760     val_loss: 1.3077, val_acc: 0.556 


  0%|          | 0/324 [00:00<?, ?it/s]

EPOCH: 0019     train_loss: 0.6311, train_acc: 0.772     val_loss: 1.2657, val_acc: 0.570 


  0%|          | 0/324 [00:00<?, ?it/s]

Exception ignored in: Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x79db8bf46320><function _MultiProcessingDataLoaderIter.__del__ at 0x79db8bf46320>

Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
        self._shutdown_workers()self._shutdown_workers()Exception ignored in: 
<function _MultiProcessingDataLoaderIter.__del__ at 0x79db8bf46320>  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers

    if w.is_alive():
Exception ignored in: 
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    assert self._parent_pid == os.getpid(), 'can

EPOCH: 0020     train_loss: 0.5964, train_acc: 0.786     val_loss: 1.3282, val_acc: 0.566 


  0%|          | 0/324 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x79db8bf46320>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x79db8bf46320>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/

EPOCH: 0021     train_loss: 0.5684, train_acc: 0.798     val_loss: 1.3783, val_acc: 0.549 


  0%|          | 0/324 [00:00<?, ?it/s]

EPOCH: 0022     train_loss: 0.5327, train_acc: 0.812     val_loss: 1.3832, val_acc: 0.559 


  0%|          | 0/324 [00:00<?, ?it/s]

EPOCH: 0023     train_loss: 0.5045, train_acc: 0.823     val_loss: 1.4050, val_acc: 0.572 


  0%|          | 0/324 [00:00<?, ?it/s]

EPOCH: 0024     train_loss: 0.4676, train_acc: 0.835     val_loss: 1.4500, val_acc: 0.547 


  0%|          | 0/324 [00:00<?, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x79db8bf46320>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1461, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x79db8bf46320>
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x79db8bf46320>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1478, in __del__
Traceback (most recent call last):
      File "/usr/local/lib/python3.10/dist-packages/tor

EPOCH: 0025     train_loss: 0.4375, train_acc: 0.845     val_loss: 1.5761, val_acc: 0.549 


  0%|          | 0/324 [00:00<?, ?it/s]

EPOCH: 0026     train_loss: 0.4248, train_acc: 0.850     val_loss: 1.5905, val_acc: 0.550 
-------------------- Early Stopping --------------------


In [16]:
def test(test_loader, model, file_name='predict.csv'):
    with torch.no_grad():
        predict_result = []
        predict_name = []
        for img, name in test_loader:
            img = img.to(device)
            pred = model(img)
            predict = torch.argmax(pred, dim=-1).tolist()
            predict_result += predict
            predict_name += name

    with open(file_name, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['id', 'label'])
        for id, r in zip(predict_name, predict_result):
            writer.writerow([id, r])

In [17]:
del model
model = CNN_scratch()
model.load_state_dict(torch.load('DL_hw2_best_model.pth'))
model = model.to(device)
test(test_loader, model)