In [54]:
import torch

print("CUDA 是否可用:", torch.cuda.is_available())
print("GPU 數量:", torch.cuda.device_count())
print(
    "目前使用的 GPU:",
    (
        torch.cuda.get_device_name(torch.cuda.current_device())
        if torch.cuda.is_available()
        else "無"
    ),
)

CUDA 是否可用: True
GPU 數量: 1
目前使用的 GPU: NVIDIA GeForce RTX 3060 Ti


In [55]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [95]:
from pathlib import Path

ROOT_DIR = Path("simpson")  # 根資料夾路徑
IMAGE_EXTENSIONS = [".jpg", ".jpeg", ".png"]

class_counts = {}

for class_folder in ROOT_DIR.iterdir():
    if class_folder.is_dir():
        count = sum(
            1
            for file in class_folder.iterdir()
            if file.suffix.lower() in IMAGE_EXTENSIONS
        )
        class_counts[class_folder.name] = count

# 印出每個類別的圖片數量
for class_name, count in sorted(class_counts.items()):
    print(f"{class_name}: {count}")

bart_simpson: 1342
homer_simpson: 2246
lisa_simpson: 1354
marge_simpson: 1291


In [34]:
import random
import shutil
from pathlib import Path

# 原始圖片資料夾
SRC_DIR = Path("simpson")
TRAIN_DIR = Path("simpson_train")
TEST_DIR = Path("simpson_test_all")

# 每類保留幾張作為訓練
train_images_per_class = 500
image_extensions = [".jpg", ".jpeg", ".png", ".bmp"]

# 建立 train 和 test 資料夾
TRAIN_DIR.mkdir(exist_ok=True)
TEST_DIR.mkdir(exist_ok=True)

for class_folder in SRC_DIR.iterdir():
    if class_folder.is_dir():
        image_files = [
            f for f in class_folder.iterdir() if f.suffix.lower() in image_extensions
        ]
        random.shuffle(image_files)  # 打亂順序

        train_images = image_files[:train_images_per_class]
        test_images = image_files[train_images_per_class:]

        # 建立子資料夾
        train_class_dir = TRAIN_DIR / class_folder.name
        test_class_dir = TEST_DIR / class_folder.name
        train_class_dir.mkdir(parents=True, exist_ok=True)
        test_class_dir.mkdir(parents=True, exist_ok=True)

        # 複製圖片
        for img in train_images:
            shutil.copy(img, train_class_dir / img.name)
        for img in test_images:
            shutil.copy(img, test_class_dir / img.name)

        print(
            f"✔ {class_folder.name}: Train={len(train_images)}, Test={len(test_images)}"
        )

print("資料分割完成！")

✔ bart_simpson: Train=500, Test=842
✔ homer_simpson: Train=500, Test=1746
✔ lisa_simpson: Train=500, Test=854
✔ marge_simpson: Train=500, Test=791
資料分割完成！


In [35]:
import os
import shutil
import random

# 原始資料夾
SRC_ROOT = "simpson_test_all"
# 目標資料夾
DST_ROOT = "simpson_test"
# 每個角色選擇的圖片數量
SELECTED_NUM = 100  

# 確保目標資料夾存在
os.makedirs(DST_ROOT, exist_ok=True)

# 遍歷角色子資料夾
for class_name in os.listdir(SRC_ROOT):
    class_path = os.path.join(SRC_ROOT, class_name)

    if os.path.isdir(class_path):
        # 建立新目標子資料夾
        dst_class_path = os.path.join(DST_ROOT, class_name)
        os.makedirs(dst_class_path, exist_ok=True)

        # 取得所有圖片，隨機選 SELECTED_NUM張
        images = os.listdir(class_path)
        selected = random.sample(images, SELECTED_NUM)

        # 複製圖片
        for img_name in selected:
            src_img = os.path.join(class_path, img_name)
            dst_img = os.path.join(dst_class_path, img_name)
            shutil.copyfile(src_img, dst_img)

print(f"完成從每個角色各取 {SELECTED_NUM} 張圖，並建立 {DST_ROOT} 資料夾！")

完成從每個角色各取 100 張圖，並建立 simpson_test 資料夾！


In [74]:
import torch
from torchvision import datasets, transforms

# define the data transforms (資料轉換)
transform = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),  # [-1, 1]
    ]
)

# 載入資料集
train_dataset = datasets.ImageFolder(root="./simpson_train", transform=transform)
test_dataset = datasets.ImageFolder(root="./simpson_test", transform=transform)

# 批次加載
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)

In [58]:
image, label = train_dataset[0]

In [59]:
image.size()  

torch.Size([3, 224, 224])

In [60]:
class_names = train_dataset.classes
class_names

['bart_simpson', 'homer_simpson', 'lisa_simpson', 'marge_simpson']

In [40]:
# # make CNN Model
# class CNN(nn.Module):
#     def __init__(self):
#         super(CNN, self).__init__()
#         self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=5)
#         self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
#         self.conv2 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=5)

#         self.fc1 = nn.Linear(24 * 53 * 53, 120)  
#         self.fc2 = nn.Linear(120, 84)
#         self.fc3 = nn.Linear(84, len(class_names))

#     def forward(self, x):
#         x = self.pool(F.relu(self.conv1(x)))
#         x = self.pool(F.relu(self.conv2(x)))
#         x = torch.flatten(x, 1)
#         x = F.relu(self.fc1(x))
#         x = F.relu(self.fc2(x))
#         x = self.fc3(x)
#         return x

In [61]:
import torch
import torch.nn as nn
import torch.optim as optim

# 卷積神經網路(用來處理圖片分類任務)
class CNN(nn.Module):
    def __init__(self, num_classes):
        super(CNN, self).__init__()

        self.features = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1), # 捲積
            nn.ReLU(), # 激活
            nn.MaxPool2d(kernel_size=2, stride=2),  # 112x112 #降維壓縮 (池化)
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # 56x56
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # 28x28
        )

        self.classifier = nn.Sequential(
            nn.Linear(128 * 28 * 28, 512),
            nn.ReLU(),
            nn.Dropout(
                0.5
            ),  # 隨機「關掉」某些神經元的輸出，防止模型也就是過擬合
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = self.classifier(x)
        return x

In [62]:
# manipulate loss function and optimizer
model = CNN(len(class_names)).to(device)  
# Loss function
criterion = nn.CrossEntropyLoss()
# Optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [63]:
# Train the model
EPOCH = 100


for epoch in range(EPOCH):
    running_loss = 0.0  
    correct = 0
    total = 0
    for i, (images, labels) in enumerate(train_loader):
        # load data to device (GPU or CPU)
        images = images.to(device)
        labels = labels.to(device)
        # zero the gradient buffers
        optimizer.zero_grad()  
        # feed foreward
        outputs = model(images)
        # evaluating loss
        loss = criterion(outputs, labels)
        # feed backward
        loss.backward()
        # update parameters
        optimizer.step()

        running_loss += loss.item()

        # 計算準確率
        _, predicted = torch.max(outputs.data, 1) 
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    avg_loss = running_loss / len(train_loader)
    accuracy = 100 * correct / total
    print(
        f"Epoch [{epoch}/{EPOCH}], Loss: {avg_loss:.8f}, Accuracy: {accuracy:.2f}%"
    )

Epoch [0/100], Loss: 0.97870982, Accuracy: 55.36%
Epoch [1/100], Loss: 0.74817580, Accuracy: 65.22%
Epoch [2/100], Loss: 0.65498834, Accuracy: 70.84%
Epoch [3/100], Loss: 0.56350886, Accuracy: 75.65%
Epoch [4/100], Loss: 0.47327030, Accuracy: 79.56%
Epoch [5/100], Loss: 0.40451188, Accuracy: 83.27%
Epoch [6/100], Loss: 0.28047490, Accuracy: 89.12%
Epoch [7/100], Loss: 0.18749736, Accuracy: 93.31%
Epoch [8/100], Loss: 0.13276261, Accuracy: 95.55%
Epoch [9/100], Loss: 0.09267966, Accuracy: 96.83%
Epoch [10/100], Loss: 0.06393274, Accuracy: 97.85%
Epoch [11/100], Loss: 0.08275156, Accuracy: 97.16%
Epoch [12/100], Loss: 0.05934071, Accuracy: 98.03%
Epoch [13/100], Loss: 0.05858211, Accuracy: 97.94%
Epoch [14/100], Loss: 0.06957361, Accuracy: 97.82%
Epoch [15/100], Loss: 0.02732759, Accuracy: 99.16%
Epoch [16/100], Loss: 0.01556781, Accuracy: 99.37%
Epoch [17/100], Loss: 0.03191702, Accuracy: 98.92%
Epoch [18/100], Loss: 0.03478150, Accuracy: 99.07%
Epoch [19/100], Loss: 0.02085074, Accurac

In [89]:
# Save torch model
torch.save(model.state_dict(), "mlp4.model")

In [90]:
# MLP 產生
model = CNN(len(class_names))

# Load torch model
model.load_state_dict(torch.load("mlp4.model", map_location=device))
model.to(device)
print("Load previous mlp model completely!")

Load previous mlp model completely!


In [91]:
def evaluate_model(model, test_loader, class_names, device):
    correct = 0
    total = 0
    model.eval()

    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            _, predicted = torch.max(outputs, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            for i in range(predicted.size(0)):
                pred_idx = predicted[i].item()
                true_idx = labels[i].item()
                pred_label = class_names[pred_idx]
                true_label = class_names[true_idx]

                if pred_idx == true_idx:
                    print(
                        f"\033[92m[✅ 正確]\033[0m Prediction: {pred_label}, Ground Truth: {true_label}"
                    )
                else:
                    print(
                        f"\033[91m[❌ 錯誤]\033[0m Prediction: {pred_label}, Ground Truth: {true_label}"
                    )

    accuracy = 100 * correct / total
    print(f"\nAccuracy: {accuracy:.2f}%")
    return accuracy

In [92]:
simpson_test_all_dataset = datasets.ImageFolder(
    root="./simpson_test_all", transform=transform
)
# 批次加載
simpson_test_all_loader = torch.utils.data.DataLoader(
    simpson_test_all_dataset, batch_size=32, shuffle=False
)

In [93]:
print(f"Evaluate model (test_loader)")
accuracy = evaluate_model(model, test_loader, class_names, device)

Evaluate model (test_loader)
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m

In [88]:
print(f"Evaluate model (simpson_test_all_loader)")
accuracy = evaluate_model(model, simpson_test_all_loader, class_names, device)

Evaluate model (simpson_test_all_loader)
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[91m[❌ 錯誤][0m Prediction: homer_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_simpson
[92m[✅ 正確][0m Prediction: bart_simpson, Ground Truth: bart_