## 新的前處理 70 15 15然後Train的有拿原本的和處理完的合起來變兩倍

In [None]:
import os
import random
from PIL import Image
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torchvision import transforms

# 統一使用 ImageNet 統計資料
imagenet_stats = [(0.485, 0.456, 0.406), (0.229, 0.224, 0.225)]

# === 前處理 ===
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),  # 隨機裁切圖片的一部分並 resize 成 224x224，模擬不同構圖與尺寸
    transforms.RandomHorizontalFlip(),  # 以 50% 機率左右翻轉圖片，增加對左右視角變化的魯棒性
    transforms.RandomAffine(            # 隨機仿射變換：旋轉、平移、縮放、剪切
        degrees=15,                     # 隨機旋轉角度範圍為 ±15 度
        translate=(0.1, 0.1),           # 水平與垂直最大平移為 10%
        scale=(0.9, 1.1),               # 縮放比例在 90% 到 110% 之間
        shear=10                        # 隨機剪切角度（±10 度）
    ),
    transforms.ColorJitter(             # 隨機改變圖像的顏色屬性，模擬不同光線情況
        brightness=0.2,                 # 亮度在 ±20% 範圍內變動
        contrast=0.2,                   # 對比度在 ±20% 範圍內變動
        saturation=0.2,                 # 飽和度在 ±20% 範圍內變動
        hue=0.1                         # 色調在 ±0.1（即 ±18 度）範圍內變動
    ),
    transforms.GaussianBlur(            # 對圖像套用隨機高斯模糊，模擬鏡頭模糊或移動模糊
        kernel_size=3,                  # 模糊的區域大小為 3x3
        sigma=(0.1, 2.0)                # 模糊程度（標準差）在 0.1 到 2.0 之間隨機選擇
    ),
    transforms.ToTensor(),              # 將 PIL 圖片轉成 PyTorch Tensor，並將像素值從 [0, 255] 映射到 [0.0, 1.0]
    transforms.Normalize(               # 使用 ImageNet 統計值對 RGB 三個通道做標準化
        imagenet_stats[0],              # 平均值：mean = [0.485, 0.456, 0.406]
        imagenet_stats[1]               # 標準差：std = [0.229, 0.224, 0.225]
    )
])

validation_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_stats[0], imagenet_stats[1])
])

# === 自訂 Dataset 類別 ===
class ImagePathDataset(Dataset):
    def __init__(self, samples, transform):
        self.samples = samples  # list of (img_path, label)
        self.transform = transform

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, label = self.samples[idx]
        image = Image.open(path).convert("RGB")
        return self.transform(image), label

# === 主函式：從 rename_data 建立資料集與 dataloader ===
def build_loaders_from_rename_data(rename_root, batch_size=32, seed=42):
    random.seed(seed)

    train_samples_aug = []
    train_samples_plain = []
    val_samples = []
    test_samples = []
    class_to_idx = {}

    for class_idx, node_name in enumerate(sorted(os.listdir(rename_root))):
        node_path = os.path.join(rename_root, node_name)
        if not os.path.isdir(node_path):
            continue

        class_to_idx[node_name] = class_idx

        all_images = [
            os.path.join(node_path, fname)
            for fname in sorted(os.listdir(node_path))
            if fname.lower().endswith(('.jpg', '.jpeg', '.png'))
        ]

        random.shuffle(all_images)
        total = len(all_images)
        n_train = int(total * 0.7)
        n_val = int(total * 0.15)

        train_imgs = all_images[:n_train]
        val_imgs = all_images[n_train:n_train + n_val]
        test_imgs = all_images[n_train + n_val:]

        # 分別加進兩組 train（不同 transform）
        train_samples_aug.extend([(p, class_idx) for p in train_imgs])
        train_samples_plain.extend([(p, class_idx) for p in train_imgs])
        val_samples.extend([(p, class_idx) for p in val_imgs])
        test_samples.extend([(p, class_idx) for p in test_imgs])

    # 建立 dataset
    train_aug_set = ImagePathDataset(train_samples_aug, transform=train_transform)
    train_plain_set = ImagePathDataset(train_samples_plain, transform=validation_transform)
    val_set = ImagePathDataset(val_samples, transform=validation_transform)
    test_set = ImagePathDataset(test_samples, transform=validation_transform)

    # train 資料集合併（資料量 2 倍）
    full_train_set = ConcatDataset([train_aug_set, train_plain_set])

    # 建立 dataloader
    train_loader = DataLoader(full_train_set, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, test_loader, class_to_idx


In [None]:
rename_root = "/content/drive/MyDrive/DLA_term_project_data/rename_data"

train_loader, val_loader, test_loader, class_map = build_loaders_from_rename_data(rename_root)

print("類別對應:", class_map)
print(f"訓練集: {len(train_loader.dataset)}, 驗證集: {len(val_loader.dataset)}, 測試集: {len(test_loader.dataset)}")


MyDrive


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/DLA_term_project_data/rename_data'

## rename data 處理

In [None]:
import os
import shutil
from tqdm import tqdm

# 人名對應代碼
person_code = {
    'Barry': 'A',
    'Billy': 'B',
    'Chan': 'C',
    'Edwin': 'D'
}

# 合法副檔名
valid_extensions = ('.jpg', '.jpeg', '.png', '.heic', '.JPG', '.HEIC')

# Colab 環境下的根資料夾
source_root = "/content/drive/MyDrive/DLA_term_project_data/all_data"
target_root = "/content/drive/MyDrive/DLA_term_project_data/rename_data"

# 建立 rename_data/node_1 到 node_22
os.makedirs(target_root, exist_ok=True)
for i in range(1, 23):
    os.makedirs(os.path.join(target_root, f"node_{i}"), exist_ok=True)

# 遍歷每個人
for person in sorted(os.listdir(source_root)):
    person_path = os.path.join(source_root, person)
    if not os.path.isdir(person_path) or person not in person_code:
        continue

    person_prefix = person_code[person]

    # 遍歷每個 node 資料夾
    for node_name in sorted(os.listdir(person_path)):
        node_path = os.path.join(person_path, node_name)
        if not os.path.isdir(node_path):
            continue

        # 目的資料夾
        target_node_dir = os.path.join(target_root, node_name)

        for category in ["sea", "up"]:
            category_path = os.path.join(node_path, category)
            if not os.path.isdir(category_path):
                continue

            type_prefix = 's' if category == "sea" else 'u'
            full_prefix = f"{person_prefix}{type_prefix}"

            # 找目前在目標資料夾下該 prefix 已經有幾張圖
            existing = [
                f for f in os.listdir(target_node_dir)
                if f.startswith(full_prefix) and f.lower().endswith(valid_extensions)
            ]
            start_index = len(existing) + 1

            images = sorted([
                f for f in os.listdir(category_path)
                if f.lower().endswith(valid_extensions)
            ])

            for idx, filename in enumerate(images, start=start_index):
                ext = os.path.splitext(filename)[1]  # 保留原副檔名大小寫
                new_name = f"{full_prefix}_{idx}{ext}"
                src = os.path.join(category_path, filename)
                dst = os.path.join(target_node_dir, new_name)
                shutil.copy2(src, dst)

            tqdm.write(f"✅ Copied {len(images)} from {person}/{node_name}/{category} to {target_node_dir}")


✅ Copied 8 from Barry/node_1/sea to /content/drive/MyDrive/DLA_term_project_data/rename_data/node_1
✅ Copied 8 from Barry/node_1/up to /content/drive/MyDrive/DLA_term_project_data/rename_data/node_1
✅ Copied 8 from Barry/node_10/sea to /content/drive/MyDrive/DLA_term_project_data/rename_data/node_10
✅ Copied 8 from Barry/node_10/up to /content/drive/MyDrive/DLA_term_project_data/rename_data/node_10
✅ Copied 8 from Barry/node_11/sea to /content/drive/MyDrive/DLA_term_project_data/rename_data/node_11
✅ Copied 8 from Barry/node_11/up to /content/drive/MyDrive/DLA_term_project_data/rename_data/node_11
✅ Copied 9 from Barry/node_12/sea to /content/drive/MyDrive/DLA_term_project_data/rename_data/node_12
✅ Copied 9 from Barry/node_12/up to /content/drive/MyDrive/DLA_term_project_data/rename_data/node_12
✅ Copied 8 from Barry/node_13/sea to /content/drive/MyDrive/DLA_term_project_data/rename_data/node_13
✅ Copied 8 from Barry/node_13/up to /content/drive/MyDrive/DLA_term_project_data/rename_da

In [None]:
!pip install pyheif pillow --quiet

import os
import pyheif
from PIL import Image

def convert_all_heic_in_rename_data(root_folder="/content/drive/MyDrive/DLA_term_project_data/rename_data"):
    failed_files = []

    for node_folder in os.listdir(root_folder):
        node_path = os.path.join(root_folder, node_folder)
        if not os.path.isdir(node_path):
            continue

        for file in os.listdir(node_path):
            if file.lower().endswith(".heic"):
                heic_path = os.path.join(node_path, file)
                jpg_path = os.path.splitext(heic_path)[0] + ".jpg"

                try:
                    heif_file = pyheif.read(heic_path)
                    image = Image.frombytes(
                        heif_file.mode,
                        heif_file.size,
                        heif_file.data,
                        "raw",
                        heif_file.mode,
                        heif_file.stride,
                    )
                    image.save(jpg_path, "JPEG")
                    print(f"✅ Converted: {heic_path}")
                    os.remove(heic_path)  # 如果你想刪掉原始 HEIC 檔案可取消註解
                except Exception as e:
                    print(f"❌ Failed: {heic_path} — {str(e)}")
                    failed_files.append(heic_path)

    if failed_files:
        print("\n⚠️ Failed to convert the following files:")
        for f in failed_files:
            print(f"- {f}")
        with open("failed_rename.txt", "w") as f:
            for path in failed_files:
                f.write(path + "\n")

# 執行轉檔
convert_all_heic_in_rename_data("/content/drive/MyDrive/DLA_term_project_data/rename_data")


✅ Converted: /content/drive/MyDrive/DLA_term_project_data/rename_data/node_1/As_1.HEIC
✅ Converted: /content/drive/MyDrive/DLA_term_project_data/rename_data/node_1/As_2.HEIC
✅ Converted: /content/drive/MyDrive/DLA_term_project_data/rename_data/node_1/As_3.HEIC
✅ Converted: /content/drive/MyDrive/DLA_term_project_data/rename_data/node_1/As_4.HEIC
✅ Converted: /content/drive/MyDrive/DLA_term_project_data/rename_data/node_1/As_5.HEIC
✅ Converted: /content/drive/MyDrive/DLA_term_project_data/rename_data/node_1/As_6.HEIC
✅ Converted: /content/drive/MyDrive/DLA_term_project_data/rename_data/node_1/As_7.HEIC
✅ Converted: /content/drive/MyDrive/DLA_term_project_data/rename_data/node_1/As_8.HEIC
✅ Converted: /content/drive/MyDrive/DLA_term_project_data/rename_data/node_1/Au_1.HEIC
✅ Converted: /content/drive/MyDrive/DLA_term_project_data/rename_data/node_1/Au_2.HEIC
✅ Converted: /content/drive/MyDrive/DLA_term_project_data/rename_data/node_1/Au_3.HEIC
✅ Converted: /content/drive/MyDrive/DLA_ter

In [None]:
import os

def rename_all_jpg_to_lowercase(folder_path):
    for root, _, files in os.walk(folder_path):
        for file in files:
            if file.endswith(".JPG"):
                old_path = os.path.join(root, file)
                new_filename = file[:-4] + ".jpg"
                new_path = os.path.join(root, new_filename)

                # 如果檔案已經存在，避免覆蓋
                if os.path.exists(new_path):
                    print(f"⚠️ Skipped (already exists): {new_filename}")
                    continue

                os.rename(old_path, new_path)
                print(f"🔄 Renamed: {file} → {new_filename}")

# 執行範例（針對 rename_data 資料夾）
rename_all_jpg_to_lowercase("/content/drive/MyDrive/DLA_term_project_data/rename_data")


🔄 Renamed: Ds_1.JPG → Ds_1.jpg
🔄 Renamed: Ds_2.JPG → Ds_2.jpg
🔄 Renamed: Ds_3.JPG → Ds_3.jpg
🔄 Renamed: Ds_4.JPG → Ds_4.jpg
🔄 Renamed: Ds_5.JPG → Ds_5.jpg
🔄 Renamed: Ds_6.JPG → Ds_6.jpg
🔄 Renamed: Ds_7.JPG → Ds_7.jpg
🔄 Renamed: Ds_8.JPG → Ds_8.jpg
🔄 Renamed: Du_1.JPG → Du_1.jpg
🔄 Renamed: Du_2.JPG → Du_2.jpg
🔄 Renamed: Du_3.JPG → Du_3.jpg
🔄 Renamed: Du_4.JPG → Du_4.jpg
🔄 Renamed: Du_5.JPG → Du_5.jpg
🔄 Renamed: Du_6.JPG → Du_6.jpg
🔄 Renamed: Ds_1.JPG → Ds_1.jpg
🔄 Renamed: Ds_2.JPG → Ds_2.jpg
🔄 Renamed: Ds_3.JPG → Ds_3.jpg
🔄 Renamed: Ds_4.JPG → Ds_4.jpg
🔄 Renamed: Ds_5.JPG → Ds_5.jpg
🔄 Renamed: Ds_6.JPG → Ds_6.jpg
🔄 Renamed: Ds_7.JPG → Ds_7.jpg
🔄 Renamed: Ds_8.JPG → Ds_8.jpg
🔄 Renamed: Du_1.JPG → Du_1.jpg
🔄 Renamed: Du_2.JPG → Du_2.jpg
🔄 Renamed: Du_3.JPG → Du_3.jpg
🔄 Renamed: Du_4.JPG → Du_4.jpg
🔄 Renamed: Du_5.JPG → Du_5.jpg
🔄 Renamed: Du_6.JPG → Du_6.jpg
🔄 Renamed: Du_7.JPG → Du_7.jpg
🔄 Renamed: Du_8.JPG → Du_8.jpg
🔄 Renamed: Ds_1.JPG → Ds_1.jpg
🔄 Renamed: Ds_2.JPG → Ds_2.jpg
🔄 Rename

我前處理都弄好了，我把每個人的每個node裡面的sea,up都各取一張當test, validation(一個node會有八張test,八張validation)，然後剩下的就都在training

檔案都放在classified_data裡面

我把格式都弄成.jpg比較好讀

我的preprocess裡面會有兩大部分：
第一部分的前處理裡面的第一個cell要先跑才能用colab讀資料，第二個cell是前處理的內容，我就把文件寫的都弄一弄，然後有用dataloader把資料讀進來，第三個cell是我跑了一個很簡單的模型，有跑起來，所以我前面的處理應該都沒問題了。
第二部分的處理檔案你們應該不用管。

處理檔案的時候發現，以下三張有問題，所以就刪掉了
edwin node1 3942 3944, barry node9 5488

## 前處理

##### 要先跑這個，然後選同意，才能用colab讀資料

In [6]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


##### 前處理的內容，你們主要應該是用這段，把dataloader載進來

In [None]:
import os
import torch
import torch.nn as nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from torch.optim import Adam
from tqdm import tqdm

imagenet_stats = [(0.485, 0.456, 0.406), (0.229, 0.224, 0.225)]

train_transform = transforms.Compose([
    transforms.RandomResizedCrop(224),  # 隨機裁切圖片的一部分並 resize 成 224x224，模擬不同構圖與尺寸
    transforms.RandomHorizontalFlip(),  # 以 50% 機率左右翻轉圖片，增加對左右視角變化的魯棒性
    transforms.RandomAffine(            # 隨機仿射變換：旋轉、平移、縮放、剪切
        degrees=15,                     # 隨機旋轉角度範圍為 ±15 度
        translate=(0.1, 0.1),           # 水平與垂直最大平移為 10%
        scale=(0.9, 1.1),               # 縮放比例在 90% 到 110% 之間
        shear=10                        # 隨機剪切角度（±10 度）
    ),
    transforms.ColorJitter(             # 隨機改變圖像的顏色屬性，模擬不同光線情況
        brightness=0.2,                 # 亮度在 ±20% 範圍內變動
        contrast=0.2,                   # 對比度在 ±20% 範圍內變動
        saturation=0.2,                 # 飽和度在 ±20% 範圍內變動
        hue=0.1                         # 色調在 ±0.1（即 ±18 度）範圍內變動
    ),
    transforms.GaussianBlur(            # 對圖像套用隨機高斯模糊，模擬鏡頭模糊或移動模糊
        kernel_size=3,                  # 模糊的區域大小為 3x3
        sigma=(0.1, 2.0)                # 模糊程度（標準差）在 0.1 到 2.0 之間隨機選擇
    ),
    transforms.ToTensor(),              # 將 PIL 圖片轉成 PyTorch Tensor，並將像素值從 [0, 255] 映射到 [0.0, 1.0]
    transforms.Normalize(               # 使用 ImageNet 統計值對 RGB 三個通道做標準化
        imagenet_stats[0],              # 平均值：mean = [0.485, 0.456, 0.406]
        imagenet_stats[1]               # 標準差：std = [0.229, 0.224, 0.225]
    )
])


validation_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_stats[0], imagenet_stats[1])
])

test_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_stats[0], imagenet_stats[1])
])

# === 資料路徑 ===
base_path = "/content/drive/MyDrive/DLA_term_project_data/classified_data"

# === 套用對應 transform 的資料集 ===
train_data = datasets.ImageFolder(root=os.path.join(base_path, "train"), transform=train_transform)
val_data = datasets.ImageFolder(root=os.path.join(base_path, "validation"), transform=validation_transform)
test_data = datasets.ImageFolder(root=os.path.join(base_path, "test"), transform=test_transform)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
val_loader = DataLoader(val_data, batch_size=32, shuffle=False)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

# 顯示類別對應
print("Class to index mapping:", train_data.class_to_idx)

Class to index mapping: {'node_1': 0, 'node_10': 1, 'node_11': 2, 'node_12': 3, 'node_13': 4, 'node_14': 5, 'node_15': 6, 'node_16': 7, 'node_17': 8, 'node_18': 9, 'node_19': 10, 'node_2': 11, 'node_20': 12, 'node_21': 13, 'node_22': 14, 'node_3': 15, 'node_4': 16, 'node_5': 17, 'node_6': 18, 'node_7': 19, 'node_8': 20, 'node_9': 21}


##### 這邊是我隨便跑了很簡單的resnet然後3個epoch，有跑起來，所以前面的應該是沒問題

In [None]:
# 裝置選擇
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 載入預訓練 ResNet18 並凍結 feature 層
model = models.resnet18(pretrained=True)

# ❄️ 凍結所有層的參數（不做反向傳播）
for param in model.parameters():
    param.requires_grad = False

# 🔄 替換最後一層（需訓練）
model.fc = nn.Linear(model.fc.in_features, 22)
for param in model.fc.parameters():
    param.requires_grad = True

model = model.to(device)

# 損失與優化器（❗只傳入需要訓練的參數 model.fc）
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.fc.parameters(), lr=1e-4)  # ✅ 只優化 fc 層參數

# 訓練函數
def train(model, loader, optimizer, criterion):
    model.train()
    running_loss = 0
    correct = 0
    total = 0
    for inputs, labels in tqdm(loader, desc="Training"):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    return running_loss / total, correct / total

# 驗證函數
def evaluate(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return correct / total

# 訓練多個 epoch（你目前設 3 回合是合理測試）
EPOCHS = 3
for epoch in range(EPOCHS):
    train_loss, train_acc = train(model, train_loader, optimizer, criterion)
    val_acc = evaluate(model, val_loader)
    print(f"[Epoch {epoch+1}] Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}")

# 測試結果
test_acc = evaluate(model, test_loader)
print(f"\n✅ Final Test Accuracy: {test_acc:.4f}")


Training: 100%|██████████| 36/36 [13:11<00:00, 21.99s/it]


[Epoch 1] Train Loss: 3.2020, Train Acc: 0.0540, Val Acc: 0.0795


Training: 100%|██████████| 36/36 [03:14<00:00,  5.39s/it]


[Epoch 2] Train Loss: 3.1164, Train Acc: 0.0557, Val Acc: 0.0795


Training: 100%|██████████| 36/36 [03:14<00:00,  5.41s/it]


[Epoch 3] Train Loss: 3.0707, Train Acc: 0.0809, Val Acc: 0.0795

✅ Final Test Accuracy: 0.1023


## 處理檔案，全部轉成jpg並且抽樣的分類到test train validation。以下的code你們都不用跑，我只是把它留下來而已


In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


### 把四個人的資料分別在每個node/sea,up裡面都取一張test一張validatoin，然後剩下丟到train

In [None]:
import os
import random
import shutil
from google.colab import drive

# 檢查是否已經掛載 Google Drive
if not os.path.exists('/content/drive'):
    drive.mount('/content/drive')

# 設定源路徑和目標路徑
source_base_path = '/content/drive/My Drive/DLA_term_project_data/'
destination_base_path = '/content/drive/My Drive/DLA_term_project_data/classified_data/' # 設定你想要存放分類後資料的目標路徑

# 創建目標文件夾結構
for dataset_type in ['train', 'validation', 'test']:
    for node_num in range(1, 23):
        node_folder = os.path.join(destination_base_path, dataset_type, f'node_{node_num}')
        os.makedirs(node_folder, exist_ok=True)

# 定義人物列表
people = ['Barry', 'Billy', 'Edwin', 'Chan']

# 遍歷文件夾並分類圖片
for person in people:
    person_path = os.path.join(source_base_path, person)
    for node_num in range(1, 23):
        node_path = os.path.join(person_path, f'node_{node_num}')
        for view in ['sea', 'up']:
            view_path = os.path.join(node_path, view)

            if os.path.exists(view_path):
                # 獲取所有圖片文件
                all_files = [f for f in os.listdir(view_path) if f.lower().endswith(('.jpg', '.heic'))]

                if len(all_files) >= 2:
                    # 隨機選擇 test 和 validation 圖片
                    test_image = random.choice(all_files)
                    all_files.remove(test_image)
                    validation_image = random.choice(all_files)
                    all_files.remove(validation_image)
                    train_images = all_files

                    # 複製 test 圖片
                    src_test_path = os.path.join(view_path, test_image)
                    dest_test_folder = os.path.join(destination_base_path, 'test', f'node_{node_num}')
                    shutil.copy(src_test_path, dest_test_folder)
                    print(f'Copied test image: {test_image} to {dest_test_folder}')


                    # 複製 validation 圖片
                    src_validation_path = os.path.join(view_path, validation_image)
                    dest_validation_folder = os.path.join(destination_base_path, 'validation', f'node_{node_num}')
                    shutil.copy(src_validation_path, dest_validation_folder)
                    print(f'Copied validation image: {validation_image} to {dest_validation_folder}')


                    # 複製 train 圖片
                    dest_train_folder = os.path.join(destination_base_path, 'train', f'node_{node_num}')
                    for train_image in train_images:
                        src_train_path = os.path.join(view_path, train_image)
                        shutil.copy(src_train_path, dest_train_folder)
                        print(f'Copied train image: {train_image} to {dest_train_folder}')


                elif len(all_files) == 1:
                     # 如果只有一張圖片，將其作為 train 圖片
                    train_image = all_files[0]
                    src_train_path = os.path.join(view_path, train_image)
                    dest_train_folder = os.path.join(destination_base_path, 'train', f'node_{node_num}')
                    shutil.copy(src_train_path, dest_train_folder)
                    print(f'Only one image found, copied as train image: {train_image} to {dest_train_folder}')

                else:
                    print(f'No images found in {view_path}')


print('圖片分類完成！')

Copied test image: IMG_5358.HEIC to /content/drive/My Drive/DLA_term_project_data/classified_data/test/node_1
Copied validation image: IMG_5360.HEIC to /content/drive/My Drive/DLA_term_project_data/classified_data/validation/node_1
Copied train image: IMG_5359.HEIC to /content/drive/My Drive/DLA_term_project_data/classified_data/train/node_1
Copied train image: IMG_5356.HEIC to /content/drive/My Drive/DLA_term_project_data/classified_data/train/node_1
Copied train image: IMG_5361.HEIC to /content/drive/My Drive/DLA_term_project_data/classified_data/train/node_1
Copied train image: IMG_5362.HEIC to /content/drive/My Drive/DLA_term_project_data/classified_data/train/node_1
Copied train image: IMG_5363.HEIC to /content/drive/My Drive/DLA_term_project_data/classified_data/train/node_1
Copied train image: IMG_5357.HEIC to /content/drive/My Drive/DLA_term_project_data/classified_data/train/node_1
Copied test image: IMG_5350.HEIC to /content/drive/My Drive/DLA_term_project_data/classified_dat

### 把heic檔轉成jpg

In [None]:
folder_path = "/content/drive/MyDrive/new_barry_data"
import os

for file in os.listdir(folder_path):

    print(file)

In [10]:
!pip install pyheif pillow --quiet

import os
import pyheif
from PIL import Image

def convert_all_heic_in_dataset(root_folder):
    failed_files = []

    # 處理 train / validation / test
    for split in ["test"]:
        split_path = os.path.join(root_folder, split)
        if not os.path.exists(split_path):
            continue

        for node_folder in os.listdir(split_path):
            node_path = os.path.join(split_path, node_folder)
            if not os.path.isdir(node_path):
                continue

            for file in os.listdir(node_path):
                if file.lower().endswith(".heic"):
                    heic_path = os.path.join(node_path, file)
                    jpg_path = os.path.splitext(heic_path)[0] + ".jpg"

                    try:
                        heif_file = pyheif.read(heic_path)
                        image = Image.frombytes(
                            heif_file.mode,
                            heif_file.size,
                            heif_file.data,
                            "raw",
                            heif_file.mode,
                            heif_file.stride,
                        )
                        image.save(jpg_path, "JPEG")
                        print(f"✅ Converted: {heic_path}")
                        os.remove(heic_path)  # 如果你想刪掉原始 HEIC，可取消註解
                    except Exception as e:
                        print(f"❌ Failed: {heic_path} — {str(e)}")
                        failed_files.append(heic_path)

    if failed_files:
        print("\n⚠️ Failed to convert the following files:")
        for f in failed_files:
            print(f"- {f}")
        with open("failed_all.txt", "w") as f:
            for path in failed_files:
                f.write(path + "\n")


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/5.3 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━[0m [32m4.8/5.3 MB[0m [31m145.4 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.3/5.3 MB[0m [31m89.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [11]:
convert_all_heic_in_dataset("/content/drive/MyDrive/new_barry_data")


In [None]:
!pip install pyheif pillow --quiet

import os
import pyheif
from PIL import Image

def convert_all_heic_in_dataset(root_folder):
    failed_files = []

    # 處理 train / validation / test
    for split in ["train", "validation", "test"]:
        split_path = os.path.join(root_folder, split)
        if not os.path.exists(split_path):
            continue

        for node_folder in os.listdir(split_path):
            node_path = os.path.join(split_path, node_folder)
            if not os.path.isdir(node_path):
                continue

            for file in os.listdir(node_path):
                if file.lower().endswith(".heic"):
                    heic_path = os.path.join(node_path, file)
                    jpg_path = os.path.splitext(heic_path)[0] + ".jpg"

                    try:
                        heif_file = pyheif.read(heic_path)
                        image = Image.frombytes(
                            heif_file.mode,
                            heif_file.size,
                            heif_file.data,
                            "raw",
                            heif_file.mode,
                            heif_file.stride,
                        )
                        image.save(jpg_path, "JPEG")
                        print(f"✅ Converted: {heic_path}")
                        # os.remove(heic_path)  # 如果你想刪掉原始 HEIC，可取消註解
                    except Exception as e:
                        print(f"❌ Failed: {heic_path} — {str(e)}")
                        failed_files.append(heic_path)

    if failed_files:
        print("\n⚠️ Failed to convert the following files:")
        for f in failed_files:
            print(f"- {f}")
        with open("failed_all.txt", "w") as f:
            for path in failed_files:
                f.write(path + "\n")


In [None]:
convert_all_heic_in_dataset("/content/drive/MyDrive/DLA_term_project_data/classified_data")


✅ Converted: /content/drive/MyDrive/DLA_term_project_data/classified_data/train/node_1/IMG_5359.HEIC
✅ Converted: /content/drive/MyDrive/DLA_term_project_data/classified_data/train/node_1/IMG_5356.HEIC
✅ Converted: /content/drive/MyDrive/DLA_term_project_data/classified_data/train/node_1/IMG_5361.HEIC
✅ Converted: /content/drive/MyDrive/DLA_term_project_data/classified_data/train/node_1/IMG_5362.HEIC
✅ Converted: /content/drive/MyDrive/DLA_term_project_data/classified_data/train/node_1/IMG_5363.HEIC
✅ Converted: /content/drive/MyDrive/DLA_term_project_data/classified_data/train/node_1/IMG_5357.HEIC
✅ Converted: /content/drive/MyDrive/DLA_term_project_data/classified_data/train/node_1/IMG_5355.HEIC
✅ Converted: /content/drive/MyDrive/DLA_term_project_data/classified_data/train/node_1/IMG_5351.HEIC
✅ Converted: /content/drive/MyDrive/DLA_term_project_data/classified_data/train/node_1/IMG_5349.HEIC
✅ Converted: /content/drive/MyDrive/DLA_term_project_data/classified_data/train/node_1/IMG_

### JPG轉jpg

In [None]:
import os

def rename_all_jpg_to_lowercase(folder_path):
    for root, _, files in os.walk(folder_path):
        for file in files:
            if file.endswith(".JPG"):
                old_path = os.path.join(root, file)
                new_path = os.path.join(root, file[:-4] + ".jpg")
                os.rename(old_path, new_path)
                print(f"🔄 Renamed: {file} → {file[:-4] + '.jpg'}")


In [None]:
rename_all_jpg_to_lowercase("/content/drive/MyDrive/DLA_term_project_data/classified_data")


🔄 Renamed: IMG_3946.JPG → IMG_3946.jpg
🔄 Renamed: IMG_3956.JPG → IMG_3956.jpg
🔄 Renamed: IMG_3953.JPG → IMG_3953.jpg
🔄 Renamed: IMG_3950.JPG → IMG_3950.jpg
🔄 Renamed: IMG_3962.JPG → IMG_3962.jpg
🔄 Renamed: IMG_3960.JPG → IMG_3960.jpg
🔄 Renamed: IMG_3936.JPG → IMG_3936.jpg
🔄 Renamed: IMG_3930.JPG → IMG_3930.jpg
🔄 Renamed: IMG_3932.JPG → IMG_3932.jpg
🔄 Renamed: IMG_3940.JPG → IMG_3940.jpg
🔄 Renamed: IMG_3988.JPG → IMG_3988.jpg
🔄 Renamed: IMG_3982.JPG → IMG_3982.jpg
🔄 Renamed: IMG_3986.JPG → IMG_3986.jpg
🔄 Renamed: IMG_3984.JPG → IMG_3984.jpg
🔄 Renamed: IMG_3992.JPG → IMG_3992.jpg
🔄 Renamed: IMG_3994.JPG → IMG_3994.jpg
🔄 Renamed: IMG_3976.JPG → IMG_3976.jpg
🔄 Renamed: IMG_3974.JPG → IMG_3974.jpg
🔄 Renamed: IMG_3970.JPG → IMG_3970.jpg
🔄 Renamed: IMG_3971.JPG → IMG_3971.jpg
🔄 Renamed: IMG_3967.JPG → IMG_3967.jpg
🔄 Renamed: IMG_3980.JPG → IMG_3980.jpg
🔄 Renamed: IMG_4034.JPG → IMG_4034.jpg
🔄 Renamed: IMG_4033.JPG → IMG_4033.jpg
🔄 Renamed: IMG_4032.JPG → IMG_4032.jpg
🔄 Renamed: IMG_4029.JPG →