<h1 align="center">Convotutional Neural Network (CNN) for simpson Image Recognition</h1>

<h3>Show Samples in the Unknown Folder</h3>
<pre>
images
    0: homer_simpson
    1: lisa_simpson
    2: marge_simpson
    u: (unknown)
</pre>


In [None]:
import random
import shutil
from pathlib import Path

# 原始 simpson 資料夾
SRC_DIR = Path("simpson")
DEST_DIR = Path("images")
UNKNOWN_DIR = DEST_DIR / "u"

# 設定參數
total_images_per_class = 53
unknown_images_per_class = 3
train_images_per_class = total_images_per_class - unknown_images_per_class
image_extensions = [".jpg", ".jpeg", ".png", ".bmp"]

# 建立資料夾
DEST_DIR.mkdir(exist_ok=True)
UNKNOWN_DIR.mkdir(parents=True, exist_ok=True)

# 開始處理每個類別資料夾
for idx, class_folder in enumerate(sorted(SRC_DIR.iterdir())):
    if not class_folder.is_dir():
        continue

    image_files = [
        f for f in class_folder.iterdir() if f.suffix.lower() in image_extensions
    ]

    if len(image_files) < total_images_per_class:
        print(f"{class_folder.name} 圖片不足 {total_images_per_class} 張，跳過")
        continue

    random.shuffle(image_files)
    selected_images = image_files[:total_images_per_class]
    unknown_images = selected_images[:unknown_images_per_class]
    train_images = selected_images[unknown_images_per_class:]

    # 建立類別資料夾（以數字命名）
    class_dir = DEST_DIR / str(idx)
    class_dir.mkdir(parents=True, exist_ok=True)

    # 複製訓練圖片並重新命名為：class_name_000.jpg、001.jpg...
    for i, img in enumerate(train_images):
        new_name = f"{class_folder.name}_{i:03d}.jpg"
        shutil.copy(img, class_dir / new_name)

    # 複製 unknown 圖片 → 命名為 00.jpg, 01.jpg, ..., 10.jpg, 11.jpg...
    for u_idx, img in enumerate(unknown_images):
        number = idx * 10 + u_idx
        new_name = f"{class_folder.name}_{number:02d}.jpg"
        shutil.copy(img, UNKNOWN_DIR / new_name)

    print(
        f"✔ {class_folder.name} → class {idx}: train= {len(train_images)}, unknown={len(unknown_images)}"
    )

print("全部圖片重新命名與分類完成！")

✔ homer_simpson → class 0: train= 50, unknown=3
✔ marge_simpson → class 1: train= 50, unknown=3
✅ 全部圖片重新命名與分類完成！


In [61]:
# Show Samples in the Unknown Folder

import numpy as np
import cv2
import glob

def show_sample(img):
    # cv2.imshow 使用的 RGB 數值是 (0.0,1.0)
    img = img / 255.0
    # resize 影像至 (256,256) 方便觀看
    img256 = cv2.resize(img, (256, 256))
    # 顯示影像
    cv2.namedWindow('Sample')
    cv2.imshow('Sample', img256)
    # 按下 SPACEBAR 關閉
    while (True):
        c = cv2.waitKey(500)
        if (c == ord(' ')): 
            break
    cv2.destroyAllWindows()
    return

ilst = glob.glob('images/u/*.*')

for f in ilst:
    print(f)
    img = cv2.imread(f)
    show_sample(img)

images/u\homer_simpson_00.jpg
images/u\homer_simpson_01.jpg
images/u\homer_simpson_02.jpg
images/u\marge_simpson_10.jpg
images/u\marge_simpson_11.jpg
images/u\marge_simpson_12.jpg


<hr>
<h3>Load all training samples of homer_simpson (0) & marge_simpson (1)</h3>
<p>image_size = <strong style="color:red">64</strong></p>
<pre>
images: all images (64x64x3)
labels: all labels (2)
</pre>

In [None]:
# Load all training samples of homer_simpson (0) & marge_simpson (1)

import numpy as np
import cv2
import glob

# Normalized size of sample images
image_size = 64
# Class index (labels): 0 and 1; (0 for homer_simpson & 1 for marge_simpson)
class_num = 2
class_name = [ 0, 1 ]

# 計算樣本數
count = 0
for c in class_name:
    clst = glob.glob('images/%d/*.*' % c)
    for f in clst:
        count = count + 1
sample_n = count

images = np.zeros([sample_n,image_size,image_size,3])
labels = np.zeros([sample_n,class_num])

count = 0
for c in class_name:
    clst = glob.glob('images/%d/*.*' % c)
    for f in clst:
        # rint(f)
        img = cv2.imread(f)
        img = cv2.resize(img,(image_size,image_size))
        # show_sample(img)
        images[count,:,:,:] = img[:,:,:]
        labels[count,:] = np.zeros([class_num])
        labels[count,c] = 1
        count = count + 1

# print(images)
# print(labels)

print('Samples are ready. (n=%d)' % (sample_n))


Samples are ready. (n=100)


<hr>
<h3>Basic Parameters</h3>

In [63]:
# Basic Parameters

# 載入 modules

import torch
import torch.nn as nn

# pytorch reproducible
torch.manual_seed(1)

# 設定基本參數並載入 MNIST Dataset

# 基本參數

# 訓練回合數
EPOCH = 1000
# Learning Rate (LR)
LR = 0.001
# (RGB Channels)
CHANNEL = 3
# 影像尺寸 IMAGE_SIZE x IMAGE_SIZE
IMAGE_SIZE = image_size
# 總樣本數
N = sample_n


<hr>
<h3>CNN 定義</h3>
<pre>
<span style="color:red">CNN for 64x64 color images (RGB) (Cat vs. Dogs)</span>
<span style="color:red">4 convolutional layers</span>
</pre>

In [64]:
# CNN 定義

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # Convolutional Layer 1
        self.conv1 = nn.Sequential(
            # input shape (3, 64, 64),  (IN_CHANNEL=3, IMAGE_SIZE=64, IMAGE_SIZE=64)
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1, padding=2),
            # output shape (16, 64, 64), (OUT_CHANNEL=16, IMAGE_SIZE=64, IMAGE_SIZE=64)
            # activation
            nn.ReLU(),
            # size=2x2, output shape (16, 32, 32), (OUT_CHANNEL=16, IMAGE_SIZE/2=32, IMAGE_SIZE/2=32)
            nn.MaxPool2d(kernel_size=2)
        )
        # Convolutional Layer 2
        self.conv2 = nn.Sequential(
            # input shape (16, 32, 32), (IN_CHANNEL=16, IMAGE_SIZE/2=32, IMAGE_SIZE/2=32)
            nn.Conv2d(16, 32, 5, 1, 2),
            # output shape (32, 32, 32), (OUT_CHANNEL=32, IMAGE_SIZE/2=32, IMAGE_SIZE/2=32)
            # activation
            nn.ReLU(),
            # size=2x2, output shape (32, 16, 16), (OUT_CHANNEL=32, IMAGE_SIZE/4=16, IMAGE_SIZE/4=16)
            nn.MaxPool2d(2)
        )
        # Convolutional Layer 3
        self.conv3 = nn.Sequential(
            # input shape (32, 16, 16), (IN_CHANNEL=32, IMAGE_SIZE/4=16, IMAGE_SIZE/4=16)
            nn.Conv2d(32, 64, 5, 1, 2),
            # output shape (64, 16, 16), (OUT_CHANNEL=64, IMAGE_SIZE/4=16, IMAGE_SIZE/4=16)
            # activation
            nn.ReLU(),
            # size=2x2, output shape (64, 8, 8), (OUT_CHANNEL=64, IMAGE_SIZE/8=8, IMAGE_SIZE/8=8)
            nn.MaxPool2d(2)
        )
        # Convolutional Layer 4
        self.conv4 = nn.Sequential(
            # input shape (32, 16, 16), (IN_CHANNEL=64, IMAGE_SIZE/8=8, IMAGE_SIZE/8=8)
            nn.Conv2d(64, 128, 5, 1, 2),
            # output shape (64, 16, 16), (OUT_CHANNEL=128, IMAGE_SIZE/8=8, IMAGE_SIZE/8=8)
            # activation
            nn.ReLU(),
            # size=2x2, output shape (128, 4, 4), (OUT_CHANNEL=128, IMAGE_SIZE/16=4, IMAGE_SIZE/16=4)
            nn.MaxPool2d(2)
        )
        # Dropout，丟掉一些 weights，防止 overfitting
        self.drop_out = nn.Dropout()
        # Fully connected layer 1, output 1024 classes, (IN=128*4*4=1024, OUT=256)
        self.fc1 = nn.Sequential(
            nn.Linear(128 * int(IMAGE_SIZE/16) * int(IMAGE_SIZE/16), 256),
            nn.ReLU()
        )
        # Fully connected layer 2, output 16 classes, (IN=256, OUT=16)
        self.fc2 = nn.Sequential(
            nn.Linear(256, 16),
            nn.ReLU()
        )
        # Fully connected layer 3, output 2 classes, (IN=16, OUT=2)
        self.fc3 = nn.Sequential(
            nn.Linear(16, 2)
        )
    def forward(self, x):
        net = self.conv1(x)
        net = self.conv2(net)
        net = self.conv3(net)
        net = self.conv4(net)
        # CNN reshape, (batch_size, 128 * 4 * 4)
        # net = net.view(net.size(0), -1)
        net = net.contiguous().view(net.size(0), -1)
        net= self.drop_out(net)
        net= self.fc1(net)
        net= self.fc2(net)
        output = self.fc3(net)
        return output

# CNN 產生
cnn = CNN()

# net architecture
print(cnn)


CNN(
  (conv1): Sequential(
    (0): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv4): Sequential(
    (0): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (drop_out): Dropout(p=0.5, inplace=False)
  (fc1): Sequential(
    (0): Linear(in_features=2048, out_features=256, bias=True)
    (1): ReLU()
  )
  (fc2): Sequential(
    (0): Linear(in

<hr>
<h3>改變影像資料的維度排列方式，配合 CNN 所需的維度</h3>

In [65]:
# 改變影像資料的維度排列方式，配合 CNN 所需的維度

# 原始影像集：個數 x 影像高度 x 影像寬度 x 色頻數
print(images.shape)

# 新影像集
images_for_cnn = images.copy()

print(images_for_cnn.shape)

# 維度轉換
# 維度二與維度三互換
images_for_cnn = np.swapaxes(images_for_cnn, 2, 3)
# 維度一與維度二互換
images_for_cnn = np.swapaxes(images_for_cnn, 1, 2)

# 新影像集：個數 x 色頻數 x 影像高度 x 影像寬度
print(images_for_cnn.shape)


(100, 64, 64, 3)
(100, 64, 64, 3)
(100, 3, 64, 64)


<hr>
<h3>影像集 ndarray 格式轉換為 Tensor 格式</h3>

In [66]:
# 影像集 ndarray 格式轉換為 Tensor 格式

x = torch.from_numpy(images_for_cnn).float()
y = torch.from_numpy(labels).float()


In [67]:
print(x.shape)
print(y.shape)
print(x[0])
print(y[0])

torch.Size([100, 3, 64, 64])
torch.Size([100, 2])
tensor([[[ 36.,  35.,  37.,  ...,  16.,  23.,  24.],
         [ 22.,  15.,  43.,  ...,   0.,  25.,  14.],
         [ 36.,  38.,  27.,  ...,  29.,  29.,  25.],
         ...,
         [ 26.,  25.,  22.,  ...,  16.,  21.,  22.],
         [ 24.,  27.,  10.,  ...,  22.,  30.,  26.],
         [ 19.,  22.,  10.,  ...,  17.,  25.,  21.]],

        [[ 42.,  42.,  44.,  ...,  58.,  59.,  60.],
         [ 28.,  22.,  50.,  ...,  26.,  58.,  49.],
         [ 40.,  44.,  43.,  ...,  48.,  48.,  44.],
         ...,
         [ 28.,  28.,  34.,  ...,  35.,  34.,  34.],
         [ 27.,  32.,  29.,  ...,  41.,  44.,  40.],
         [ 22.,  27.,  29.,  ...,  36.,  39.,  35.]],

        [[117., 118., 129.,  ..., 145., 138., 138.],
         [103.,  98., 135.,  ..., 115., 138., 128.],
         [121., 125., 126.,  ..., 121., 121., 117.],
         ...,
         [ 99.,  99., 106.,  ..., 103., 104., 104.],
         [102., 107., 102.,  ..., 109.,  99.,  93.],
   

<hr>
<h3>Move to GPU if CUDA Available</h3>

In [68]:
# Move to GPU if CUDA Available

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cnn.to(device)

x, y = x.to(device), y.to(device)


cpu


<hr>
<h3>CNN Model Training</h3>

In [69]:
# CNN model training

# cnn training
def cnn_training(cnn, test_x, test_y):
    # loss function changed to be MSE (from Cross Entropy)
    loss_func = torch.nn.MSELoss(reduction='sum')
    # optimize all cnn parameters
    optimizer = torch.optim.Adam(cnn.parameters(), lr=LR)
    # Training iteratively
    for epoch in range(EPOCH+1):
        # shuffle
        indices = torch.randperm(x.size()[0])
        new_x, new_y = x[indices], y[indices]
        # cnn output
        output = cnn(new_x)
        # cross entropy loss
        loss = loss_func(output, new_y)
        # display err
        if (epoch % 10 == 0):
            err = loss.item() / sample_n
            print('%5d, %18.12f' % (epoch, err))
        # clear gradients for this training step
        optimizer.zero_grad()
        # backpropagation, compute gradients
        loss.backward()
        # apply gradients
        optimizer.step()
    return cnn

cnn = cnn_training(cnn, x, y)

# Save torch model
torch.save(cnn.state_dict(), 'cnn_simpson.model')


    0,     2.401342315674
   10,     0.531620712280
   20,     0.511378936768
   30,     0.506932106018
   40,     0.503997497559
   50,     0.506200523376
   60,     0.501744346619
   70,     0.490384445190
   80,     0.472886047363
   90,     0.403548774719
  100,     0.232901611328
  110,     0.106919670105
  120,     0.053686437607
  130,     0.025192723274
  140,     0.021510856152
  150,     0.013190878630
  160,     0.013712807894
  170,     0.013665069342
  180,     0.012194274664
  190,     0.008483516574
  200,     0.009896321297
  210,     0.008644857407
  220,     0.006964403391
  230,     0.008948934078
  240,     0.006407318711
  250,     0.007596824169
  260,     0.008185850382
  270,     0.005581636429
  280,     0.006908971667
  290,     0.006295160055
  300,     0.004879609048
  310,     0.005585486889
  320,     0.007198413610
  330,     0.004789264500
  340,     0.004968159497
  350,     0.004810029864
  360,     0.009732160568
  370,     0.006639716625
  380,     0

<hr>
<h3>Load CNN Model of CD samples</h3>

In [70]:
# Load CNN Model of CD samples

def load_cnn_model():
    # Load torch model
    cnn.load_state_dict(torch.load('cnn_simpson.model', map_location='cpu'))
    cnn.eval()
    print('Load previous cnn model completely!')
    return
    
load_cnn_model()


Load previous cnn model completely!


<hr>
<h3>Test Unknown Samples</h3>

In [74]:
import torch
import glob
import cv2
import numpy as np

# 類別索引對應
label_dict = {
    0: 'homer_simpson',
    1: 'marge_simpson'
}

def cnn_testing():
    ilst = glob.glob('images/u/*.*')
    res = []

    for f in ilst:
        img = cv2.imread(f)
        img = cv2.resize(img, (image_size, image_size))

        # 調整 shape 為 (3, H, W)
        img = np.swapaxes(img, 1, 2)
        img = np.swapaxes(img, 0, 1)

        # 準備 tensor 並轉成 float
        x = torch.zeros(1, 3, image_size, image_size)
        x[0] = torch.from_numpy(img).float()
        x = x.to(device)

        # 預測
        output = cnn(x)
        pred_class = torch.argmax(output, dim=1).item()  # 取最大機率的類別 index

        # 對應成類別名稱
        pred_label = label_dict[pred_class]
        res.append([f, pred_label])
    
    return res

if __name__ == '__main__':
    res = cnn_testing()
    for r in res:
        print(f"圖片: {r[0]} → 預測類別: {r[1]}")
        


圖片: images/u\homer_simpson_00.jpg → 預測類別: homer_simpson
圖片: images/u\homer_simpson_01.jpg → 預測類別: homer_simpson
圖片: images/u\homer_simpson_02.jpg → 預測類別: homer_simpson
圖片: images/u\marge_simpson_10.jpg → 預測類別: marge_simpson
圖片: images/u\marge_simpson_11.jpg → 預測類別: marge_simpson
圖片: images/u\marge_simpson_12.jpg → 預測類別: marge_simpson
