<h1 align="center">Convotutional Neural Network (CNN) for simpson Image Recognition</h1>

<hr>
<h3>Show Samples in the Unknown Folder</h3>
<pre>
images
    0: cat
    1: dog
    u: (unknown)
</pre>

In [14]:
import random
import shutil
from pathlib import Path

# 原始 simpson 資料夾
SRC_DIR = Path("simpson")
DEST_DIR = Path("images")
UNKNOWN_DIR = DEST_DIR / "u"

# 設定參數
total_images_per_class = 50
unknown_images_per_class = 10
train_images_per_class = total_images_per_class - unknown_images_per_class
image_extensions = [".jpg", ".jpeg", ".png", ".bmp"]

# 建立資料夾
DEST_DIR.mkdir(exist_ok=True)
UNKNOWN_DIR.mkdir(parents=True, exist_ok=True)

# 開始處理每個類別資料夾
for idx, class_folder in enumerate(sorted(SRC_DIR.iterdir())):
    if not class_folder.is_dir():
        continue

    image_files = [
        f for f in class_folder.iterdir() if f.suffix.lower() in image_extensions
    ]

    if len(image_files) < total_images_per_class:
        print(f"⚠ {class_folder.name} 圖片不足 {total_images_per_class} 張，跳過")
        continue

    random.shuffle(image_files)
    selected_images = image_files[:total_images_per_class]
    unknown_images = selected_images[:unknown_images_per_class]
    train_images = selected_images[unknown_images_per_class:]

    # 建立類別資料夾（以數字命名）
    class_dir = DEST_DIR / str(idx)
    class_dir.mkdir(parents=True, exist_ok=True)

    # 複製訓練圖片並重新命名為：class_name_000.jpg、001.jpg...
    for i, img in enumerate(train_images):
        new_name = f"{class_folder.name}_{i:03d}.jpg"
        shutil.copy(img, class_dir / new_name)

    # 複製 unknown 圖片 → 命名為 00.jpg, 01.jpg, ..., 10.jpg, 11.jpg...
    for u_idx, img in enumerate(unknown_images):
        number = idx * 10 + u_idx
        new_name = f"{class_folder.name}_{number:02d}.jpg"
        shutil.copy(img, UNKNOWN_DIR / new_name)

    print(
        f"✔ {class_folder.name} → class {idx}: train= {len(train_images)}, unknown=1{len(unknown_images)}"
    )

print("✅ 全部圖片重新命名與分類完成！")

✔ bart_simpson → class 0: train= 40, unknown=110
✔ homer_simpson → class 1: train= 40, unknown=110
✔ lisa_simpson → class 2: train= 40, unknown=110
✔ marge_simpson → class 3: train= 40, unknown=110
✅ 全部圖片重新命名與分類完成！


In [1]:
# Show Samples in the Unknown Folder

import numpy as np
import cv2
import glob

def show_sample(img):
    # cv2.imshow 使用的 RGB 數值是 (0.0,1.0)
    img = img / 255.0
    # resize 影像至 (256,256) 方便觀看
    img256 = cv2.resize(img, (256, 256))
    # 顯示影像
    cv2.namedWindow('Sample')
    cv2.imshow('Sample', img256)
    # 按下 SPACEBAR 關閉
    while (True):
        c = cv2.waitKey(500)
        if (c == ord(' ')): 
            break
    cv2.destroyAllWindows()
    return

ilst = glob.glob('images/u/*.*')

for f in ilst:
    print(f)
    img = cv2.imread(f)
    show_sample(img)


images/u\0.jpg
images/u\00.jpg
images/u\1.jpg
images/u\11.jpg


<hr>
<h3>Load all training samples of cat (0) & dog (1)</h3>
<p>image_size = <strong style="color:red">64</strong></p>
<pre>
images: all images (64x64x3)
labels: all labels (2)
</pre>

In [2]:
# Load all training samples of cat (0) & dog (1)

import numpy as np
import cv2
import glob

# Normalized size of sample images
image_size = 64
# Class index (labels): 0 and 1; (0 for egret & 1 for flamingo)
class_num = 2
class_name = [ 0, 1 ]

# 計算樣本數
count = 0
for c in class_name:
    clst = glob.glob('images/%d/*.*' % c)
    for f in clst:
        count = count + 1
sample_n = count

images = np.zeros([sample_n,image_size,image_size,3])
labels = np.zeros([sample_n,class_num])

count = 0
for c in class_name:
    clst = glob.glob('images/%d/*.*' % c)
    for f in clst:
        # rint(f)
        img = cv2.imread(f)
        img = cv2.resize(img,(image_size,image_size))
        # show_sample(img)
        images[count,:,:,:] = img[:,:,:]
        labels[count,:] = np.zeros([class_num])
        labels[count,c] = 1
        count = count + 1

# print(images)
# print(labels)

print('Samples are ready. (n=%d)' % (sample_n))


Samples are ready. (n=20)


<hr>
<h3>Basic Parameters</h3>

In [3]:
# Basic Parameters

# 載入 modules

import torch
import torch.nn as nn

# pytorch reproducible
torch.manual_seed(1)

# 設定基本參數並載入 MNIST Dataset

# 基本參數

# 訓練回合數
EPOCH = 1000
# Learning Rate (LR)
LR = 0.001
# (RGB Channels)
CHANNEL = 3
# 影像尺寸 IMAGE_SIZE x IMAGE_SIZE
IMAGE_SIZE = image_size
# 總樣本數
N = sample_n


<hr>
<h3>CNN 定義</h3>
<pre>
<span style="color:red">CNN for 64x64 color images (RGB) (Cat vs. Dogs)</span>
<span style="color:red">4 convolutional layers</span>
</pre>

In [4]:
# CNN 定義

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # Convolutional Layer 1
        self.conv1 = nn.Sequential(
            # input shape (3, 64, 64),  (IN_CHANNEL=3, IMAGE_SIZE=64, IMAGE_SIZE=64)
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride=1, padding=2),
            # output shape (16, 64, 64), (OUT_CHANNEL=16, IMAGE_SIZE=64, IMAGE_SIZE=64)
            # activation
            nn.ReLU(),
            # size=2x2, output shape (16, 32, 32), (OUT_CHANNEL=16, IMAGE_SIZE/2=32, IMAGE_SIZE/2=32)
            nn.MaxPool2d(kernel_size=2)
        )
        # Convolutional Layer 2
        self.conv2 = nn.Sequential(
            # input shape (16, 32, 32), (IN_CHANNEL=16, IMAGE_SIZE/2=32, IMAGE_SIZE/2=32)
            nn.Conv2d(16, 32, 5, 1, 2),
            # output shape (32, 32, 32), (OUT_CHANNEL=32, IMAGE_SIZE/2=32, IMAGE_SIZE/2=32)
            # activation
            nn.ReLU(),
            # size=2x2, output shape (32, 16, 16), (OUT_CHANNEL=32, IMAGE_SIZE/4=16, IMAGE_SIZE/4=16)
            nn.MaxPool2d(2)
        )
        # Convolutional Layer 3
        self.conv3 = nn.Sequential(
            # input shape (32, 16, 16), (IN_CHANNEL=32, IMAGE_SIZE/4=16, IMAGE_SIZE/4=16)
            nn.Conv2d(32, 64, 5, 1, 2),
            # output shape (64, 16, 16), (OUT_CHANNEL=64, IMAGE_SIZE/4=16, IMAGE_SIZE/4=16)
            # activation
            nn.ReLU(),
            # size=2x2, output shape (64, 8, 8), (OUT_CHANNEL=64, IMAGE_SIZE/8=8, IMAGE_SIZE/8=8)
            nn.MaxPool2d(2)
        )
        # Convolutional Layer 4
        self.conv4 = nn.Sequential(
            # input shape (32, 16, 16), (IN_CHANNEL=64, IMAGE_SIZE/8=8, IMAGE_SIZE/8=8)
            nn.Conv2d(64, 128, 5, 1, 2),
            # output shape (64, 16, 16), (OUT_CHANNEL=128, IMAGE_SIZE/8=8, IMAGE_SIZE/8=8)
            # activation
            nn.ReLU(),
            # size=2x2, output shape (128, 4, 4), (OUT_CHANNEL=128, IMAGE_SIZE/16=4, IMAGE_SIZE/16=4)
            nn.MaxPool2d(2)
        )
        # Dropout，丟掉一些 weights，防止 overfitting
        self.drop_out = nn.Dropout()
        # Fully connected layer 1, output 1024 classes, (IN=128*4*4=1024, OUT=256)
        self.fc1 = nn.Sequential(
            nn.Linear(128 * int(IMAGE_SIZE/16) * int(IMAGE_SIZE/16), 256),
            nn.ReLU()
        )
        # Fully connected layer 2, output 16 classes, (IN=256, OUT=16)
        self.fc2 = nn.Sequential(
            nn.Linear(256, 16),
            nn.ReLU()
        )
        # Fully connected layer 3, output 2 classes, (IN=16, OUT=2)
        self.fc3 = nn.Sequential(
            nn.Linear(16, 2)
        )
    def forward(self, x):
        net = self.conv1(x)
        net = self.conv2(net)
        net = self.conv3(net)
        net = self.conv4(net)
        # CNN reshape, (batch_size, 128 * 4 * 4)
        # net = net.view(net.size(0), -1)
        net = net.contiguous().view(net.size(0), -1)
        net= self.drop_out(net)
        net= self.fc1(net)
        net= self.fc2(net)
        output = self.fc3(net)
        return output

# CNN 產生
cnn = CNN()

# net architecture
print(cnn)


CNN(
  (conv1): Sequential(
    (0): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv4): Sequential(
    (0): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (drop_out): Dropout(p=0.5, inplace=False)
  (fc1): Sequential(
    (0): Linear(in_features=2048, out_features=256, bias=True)
    (1): ReLU()
  )
  (fc2): Sequential(
    (0): Linear(in

<hr>
<h3>改變影像資料的維度排列方式，配合 CNN 所需的維度</h3>

In [5]:
# 改變影像資料的維度排列方式，配合 CNN 所需的維度

# 原始影像集：個數 x 影像高度 x 影像寬度 x 色頻數
print(images.shape)

# 新影像集
images_for_cnn = images.copy()

print(images_for_cnn.shape)

# 維度轉換
# 維度二與維度三互換
images_for_cnn = np.swapaxes(images_for_cnn, 2, 3)
# 維度一與維度二互換
images_for_cnn = np.swapaxes(images_for_cnn, 1, 2)

# 新影像集：個數 x 色頻數 x 影像高度 x 影像寬度
print(images_for_cnn.shape)


(20, 64, 64, 3)
(20, 64, 64, 3)
(20, 3, 64, 64)


<hr>
<h3>影像集 ndarray 格式轉換為 Tensor 格式</h3>

In [6]:
# 影像集 ndarray 格式轉換為 Tensor 格式

x = torch.from_numpy(images_for_cnn).float()
y = torch.from_numpy(labels).float()


In [7]:
print(x.shape)
print(y.shape)
print(x[0])
print(y[0])

torch.Size([20, 3, 64, 64])
torch.Size([20, 2])
tensor([[[ 25.,  21.,  24.,  ..., 123., 120., 121.],
         [ 23.,  27.,  24.,  ..., 116., 114., 113.],
         [ 20.,  22.,  22.,  ..., 111., 105., 107.],
         ...,
         [ 89.,  87.,  92.,  ...,   6.,   1.,   3.],
         [ 90.,  92.,  89.,  ...,   3.,   3.,   3.],
         [ 86.,  83.,  83.,  ...,   6.,   0.,   2.]],

        [[ 31.,  28.,  29.,  ..., 152., 149., 151.],
         [ 30.,  30.,  32.,  ..., 149., 144., 140.],
         [ 30.,  29.,  29.,  ..., 141., 134., 133.],
         ...,
         [116., 119., 122.,  ...,  14.,   9.,   5.],
         [117., 119., 120.,  ...,  17.,  10.,   8.],
         [113., 114., 114.,  ...,  11.,   2.,   4.]],

        [[ 20.,  14.,  17.,  ..., 149., 146., 146.],
         [ 15.,  15.,  19.,  ..., 145., 141., 136.],
         [ 14.,  15.,  16.,  ..., 136., 131., 127.],
         ...,
         [111., 112., 117.,  ...,  31.,  26.,  16.],
         [107., 108., 113.,  ...,  35.,  25.,  22.],
     

<hr>
<h3>Move to GPU if CUDA Available</h3>

In [8]:
# Move to GPU if CUDA Available

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cnn.to(device)

x, y = x.to(device), y.to(device)


cpu


<hr>
<h3>CNN Model Training</h3>

In [9]:
# CNN model training

# cnn training
def cnn_training(cnn, test_x, test_y):
    # loss function changed to be MSE (from Cross Entropy)
    loss_func = torch.nn.MSELoss(reduction='sum')
    # optimize all cnn parameters
    optimizer = torch.optim.Adam(cnn.parameters(), lr=LR)
    # Training iteratively
    for epoch in range(EPOCH+1):
        # shuffle
        indices = torch.randperm(x.size()[0])
        new_x, new_y = x[indices], y[indices]
        # cnn output
        output = cnn(new_x)
        # cross entropy loss
        loss = loss_func(output, new_y)
        # display err
        if (epoch % 10 == 0):
            err = loss.item() / sample_n
            print('%5d, %18.12f' % (epoch, err))
        # clear gradients for this training step
        optimizer.zero_grad()
        # backpropagation, compute gradients
        loss.backward()
        # apply gradients
        optimizer.step()
    return cnn

cnn = cnn_training(cnn, x, y)

# Save torch model
torch.save(cnn.state_dict(), 'cnn_cat_dog.model')


    0,     2.624842643738
   10,     0.503200721741
   20,     0.504134988785
   30,     0.503284549713
   40,     0.315707349777
   50,     0.241398859024
   60,     0.076080358028
   70,     0.055956864357
   80,     0.023809155822
   90,     0.024634346366
  100,     0.017856195569
  110,     0.014034721255
  120,     0.013948349655
  130,     0.006369228661
  140,     0.007834349573
  150,     0.008325707167
  160,     0.006453289837
  170,     0.006038298458
  180,     0.005132657662
  190,     0.007028925419
  200,     0.005754394829
  210,     0.004183486477
  220,     0.005586883426
  230,     0.005070885271
  240,     0.004294383898
  250,     0.003390232101
  260,     0.002834146097
  270,     0.007123984396
  280,     0.005305147916
  290,     0.006417617947
  300,     0.005817922205
  310,     0.006844614446
  320,     0.005686497688
  330,     0.004706688598
  340,     0.002913056314
  350,     0.004784955457
  360,     0.003087066486
  370,     0.005389612168
  380,     0

<hr>
<h3>Load CNN Model of CD samples</h3>

In [10]:
# Load CNN Model of CD samples

def load_cnn_model():
    # Load torch model
    cnn.load_state_dict(torch.load('cnn_cat_dog.model', map_location='cpu'))
    cnn.eval()
    print('Load previous cnn model completely!')
    return
    
load_cnn_model()


Load previous cnn model completely!


<hr>
<h3>Test Unknown Samples</h3>

In [11]:
# Test Unknown Samples

def cnn_testing():
    ilst = glob.glob('images/u/*.*')
    n = len(ilst)
    res = []
    for f in ilst:
        # print(f)
        img = cv2.imread(f)
        img = cv2.resize(img,(image_size,image_size))
        img = np.swapaxes(img, 1, 2)
        img = np.swapaxes(img, 0, 1)
        x = torch.zeros(1, 3, image_size, image_size)
        x = x.to(device)
        x[0] = torch.from_numpy(img).float()
        dic = { 'Cat':0, 'Dog':0 }
        for i in range(5):
            output = cnn(x)
            olst = output.tolist()
            for o in olst:
                if (o[0] >= o[1]):
                    k = 'Cat'
                else:
                    k = 'Dog'
                dic[k] = dic[k] + 1
                # print(olst, k)
        if (dic['Cat'] >= dic['Dog']):
            res.append([f, 'Cat'])
        else:
            res.append([f, 'Dog'])
    return res

if __name__ == '__main__':
    res = cnn_testing()
    print(res)


[['images/u\\0.jpg', 'Cat'], ['images/u\\00.jpg', 'Cat'], ['images/u\\1.jpg', 'Dog'], ['images/u\\11.jpg', 'Dog']]
