In [None]:
import os

# 設定資料集根目錄
dataset_root = '/home/ne6131039/Desktop/TEM_DATAS/TEM_classification'  # 修改成你自己的資料夾路徑

# 取得分類資料夾列表
categories = ['CTEM', 'HR-TEM', 'STEM', 'SEM', 'None', 'Diffraction']

# 統計每個資料夾的圖片數量
for category in categories:
    folder_path = os.path.join(dataset_root, category)
    if os.path.exists(folder_path):
        file_count = len([
            f for f in os.listdir(folder_path)
            if os.path.isfile(os.path.join(folder_path, f)) and f.lower().endswith(('.png', '.jpg', '.jpeg'))
        ])
        print(f'{category:12s}: {file_count} images')
    else:
        print(f'{category:12s}: 資料夾不存在')


In [None]:
import os
from PIL import Image
from torchvision import transforms
import random

# 需要增強的類別
augment_categories = ['HR-TEM', 'STEM', 'Diffraction']
dataset_root = '/home/ne6131039/Desktop/TEM_DATAS/TEM_classification'

# 每張圖額外產生幾張增強圖
augment_count = 3

# 定義增強方法（可依需求調整）
augmentation = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(degrees=30),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.RandomResizedCrop(size=(224, 224), scale=(0.8, 1.0)),  # 可以換成 Resize
])

for category in augment_categories:
    folder_path = os.path.join(dataset_root, category)
    images = [f for f in os.listdir(folder_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

    for image_name in images:
        image_path = os.path.join(folder_path, image_name)
        try:
            image = Image.open(image_path).convert('RGB')
        except Exception as e:
            print(f"無法讀取 {image_name}: {e}")
            continue

        for i in range(augment_count):
            aug_img = augmentation(image)
            new_name = f"{os.path.splitext(image_name)[0]}_aug{i}.jpg"
            new_path = os.path.join(folder_path, new_name)
            aug_img.save(new_path)

        print(f"✅ 已增強: {image_name} -> {augment_count} 張")


In [None]:
import os
import shutil
import random

# 原始資料集路徑（六類放一起）
src_root = '/home/ne6131039/Desktop/TEM_DATAS/TEM_classification'

# 新的資料夾結構
dst_root = '/home/ne6131039/Desktop/TEM_DATAS/TEM_split'
splits = ['train', 'val', 'test']
split_ratio = {'train': 0.7, 'val': 0.15, 'test': 0.15}

categories = ['CTEM', 'HR-TEM', 'STEM', 'SEM', 'None', 'Diffraction']

# 建立資料夾結構
for split in splits:
    for category in categories:
        os.makedirs(os.path.join(dst_root, split, category), exist_ok=True)

# 分割資料
for category in categories:
    src_folder = os.path.join(src_root, category)
    images = [f for f in os.listdir(src_folder) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]
    random.shuffle(images)

    train_end = int(len(images) * split_ratio['train'])
    val_end = train_end + int(len(images) * split_ratio['val'])

    for i, img in enumerate(images):
        if i < train_end:
            split = 'train'
        elif i < val_end:
            split = 'val'
        else:
            split = 'test'

        src_path = os.path.join(src_folder, img)
        dst_path = os.path.join(dst_root, split, category, img)
        shutil.copy2(src_path, dst_path)

    print(f"{category:12s}: train={train_end}, val={val_end-train_end}, test={len(images)-val_end}")


In [None]:
import os
import shutil

# 原始六類分類資料夾
src_root = '/home/ne6131039/Desktop/TEM_DATAS/TEM_split'
# 新的二分類資料夾
dst_root = '/home/ne6131039/Desktop/TEM_DATAS/TEM_binary'

# 三個分割區
splits = ['train', 'val', 'test']
none_label = 'None'
all_categories = ['CTEM', 'HR-TEM', 'STEM', 'SEM', 'Diffraction', 'None']

# 建立目標資料夾結構
for split in splits:
    for label in ['None', 'NotNone']:
        os.makedirs(os.path.join(dst_root, split, label), exist_ok=True)

# 開始分類搬移資料
for split in splits:
    for category in all_categories:
        src_folder = os.path.join(src_root, split, category)
        if not os.path.exists(src_folder):
            continue

        # 決定這個分類屬於 None or NotNone
        target_label = 'None' if category == none_label else 'NotNone'
        dst_folder = os.path.join(dst_root, split, target_label)

        for fname in os.listdir(src_folder):
            if fname.lower().endswith(('.jpg', '.jpeg', '.png')):
                src_path = os.path.join(src_folder, fname)
                dst_path = os.path.join(dst_folder, f"{category}_{fname}")
                shutil.copy2(src_path, dst_path)

        print(f"[{split}] {category} -> {target_label}, 完成搬移")

print("✅ 二分類資料集產生完成！")

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR
from tqdm import tqdm
import matplotlib.pyplot as plt

# ====== 設定參數 ======
data_root = '/home/ne6131039/Desktop/TEM_DATAS/TEM_binary'
batch_size = 32
num_epochs = 50
initial_lr = 0.0001
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ====== 圖像轉換 ======
transform = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.5]*3, [0.5]*3)
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.5]*3, [0.5]*3)
    ])
}

# ====== 載入資料集 ======
datasets_binary = {
    x: datasets.ImageFolder(os.path.join(data_root, x), transform=transform[x])
    for x in ['train', 'val']
}
dataloaders = {
    x: DataLoader(datasets_binary[x], batch_size=batch_size, shuffle=True, num_workers=4)
    for x in ['train', 'val']
}
class_names = datasets_binary['train'].classes
print("類別標籤對應：", class_names)

# ====== 建立模型 ======
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 2)
model = model.to(device)

# ====== Optimizer & Scheduler ======
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=initial_lr)
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)

# ====== 記錄訓練資料 ======
loss_history = []
train_acc_history = []
val_acc_history = []
lr_history = []
best_val_acc = 0.0
best_model_path = "best_binary_classifier.pth"

# ====== 訓練主迴圈 ======
for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    model.train()
    running_loss, running_corrects, total = 0.0, 0, 0

    for inputs, labels in tqdm(dataloaders['train'], desc="Training"):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        _, preds = torch.max(outputs, 1)
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        total += labels.size(0)

    train_loss = running_loss / total
    train_acc = running_corrects.double() / total

    # 驗證階段
    model.eval()
    val_corrects, val_total = 0, 0
    with torch.no_grad():
        for inputs, labels in dataloaders['val']:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            val_corrects += torch.sum(preds == labels.data)
            val_total += labels.size(0)

    val_acc = val_corrects.double() / val_total
    current_lr = optimizer.param_groups[0]['lr']

    # 更新最佳模型
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), best_model_path)
        print(f"✅ 儲存最佳模型 (Val Acc: {val_acc:.2%})")

    # 紀錄數據
    loss_history.append(train_loss)
    train_acc_history.append(train_acc.item())
    val_acc_history.append(val_acc.item())
    lr_history.append(current_lr)

    scheduler.step()
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2%} | Val Acc: {val_acc:.2%} | LR: {current_lr:.6f}")

# ====== 繪製圖形並儲存 ======
epochs = list(range(1, num_epochs + 1))
fig, axs = plt.subplots(4, 1, figsize=(8, 12))

axs[0].plot(epochs, loss_history, color='red', marker='o')
axs[0].set_title("Training Loss over Epochs")
axs[0].set_xlabel("Epoch")
axs[0].set_ylabel("Loss")
axs[0].grid(True)

axs[1].plot(epochs, train_acc_history, color='orange', marker='o')
axs[1].set_title("Training Accuracy over Epochs")
axs[1].set_xlabel("Epoch")
axs[1].set_ylabel("Accuracy")
axs[1].grid(True)

axs[2].plot(epochs, val_acc_history, color='blue', marker='o')
axs[2].set_title("Validation Accuracy over Epochs")
axs[2].set_xlabel("Epoch")
axs[2].set_ylabel("Accuracy")
axs[2].grid(True)

axs[3].plot(epochs, lr_history, color='green', marker='o')
axs[3].set_title("Learning Rate over Epochs")
axs[3].set_xlabel("Epoch")
axs[3].set_ylabel("Learning Rate")
axs[3].grid(True)

plt.tight_layout()
plt.savefig("training_metrics_summary.png")
print("📊 已儲存訓練過程圖：training_metrics_summary.png")
plt.show()


In [None]:
import os
import torch
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# ====== 參數設定 ======
test_dir = '/home/ne6131039/Desktop/TEM_DATAS/TEM_binary/test'
model_path = 'best_binary_classifier.pth'
batch_size = 32
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ====== 圖像預處理 ======
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

# ====== 載入測試資料集 ======
test_dataset = datasets.ImageFolder(test_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
class_names = test_dataset.classes
print("分類標籤：", class_names)

# ====== 載入模型並轉移至 GPU ======
model = models.resnet18(pretrained=False)
model.fc = torch.nn.Linear(model.fc.in_features, 2)
model.load_state_dict(torch.load(model_path))
model.to(device)
model.eval()

# ====== 推論並紀錄結果 ======
all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# ====== 整體準確率 ======
all_preds = np.array(all_preds)
all_labels = np.array(all_labels)
acc = (all_preds == all_labels).sum() / len(all_labels)
print(f"✅ 測試準確率：{acc:.2%}")

# ====== 混淆矩陣與報告 ======
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(5, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix (Test Set)')
plt.tight_layout()
plt.savefig("test_confusion_matrix.png")
plt.show()

print("\n📋 詳細分類報告:")
print(classification_report(all_labels, all_preds, target_names=class_names))


In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR
from tqdm import tqdm
import matplotlib.pyplot as plt

# ====== 設定參數 ======
data_root = '/home/ne6131039/Desktop/TEM_DATAS/TEM_split'
batch_size = 32
num_epochs = 50
initial_lr = 0.0001
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ====== 圖像轉換 ======
transform = {
    'train': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.5]*3, [0.5]*3)
    ]),
    'val': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.5]*3, [0.5]*3)
    ])
}

# ====== 載入資料集 ======
datasets_multi = {
    x: datasets.ImageFolder(os.path.join(data_root, x), transform=transform[x])
    for x in ['train', 'val']
}
dataloaders = {
    x: DataLoader(datasets_multi[x], batch_size=batch_size, shuffle=True, num_workers=4)
    for x in ['train', 'val']
}
class_names = datasets_multi['train'].classes
num_classes = len(class_names)
print("五類標籤：", class_names)

# ====== 建立模型 ======
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

# ====== Loss、Optimizer、Scheduler ======
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=initial_lr)
scheduler = StepLR(optimizer, step_size=5, gamma=0.1)

# ====== 記錄訓練資料 ======
loss_history = []
train_acc_history = []
val_acc_history = []
lr_history = []
best_val_acc = 0.0
best_model_path = "best_five_class_classifier.pth"

# ====== 訓練主迴圈 ======
for epoch in range(num_epochs):
    print(f"\nEpoch {epoch+1}/{num_epochs}")
    model.train()
    running_loss, running_corrects, total = 0.0, 0, 0

    for inputs, labels in tqdm(dataloaders['train'], desc="Training"):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        _, preds = torch.max(outputs, 1)
        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        total += labels.size(0)

    train_loss = running_loss / total
    train_acc = running_corrects.double() / total

    # 驗證階段
    model.eval()
    val_corrects, val_total = 0, 0
    with torch.no_grad():
        for inputs, labels in dataloaders['val']:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            val_corrects += torch.sum(preds == labels.data)
            val_total += labels.size(0)

    val_acc = val_corrects.double() / val_total
    current_lr = optimizer.param_groups[0]['lr']

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), best_model_path)
        print(f"✅ 儲存最佳模型 (Val Acc: {val_acc:.2%})")

    loss_history.append(train_loss)
    train_acc_history.append(train_acc.item())
    val_acc_history.append(val_acc.item())
    lr_history.append(current_lr)

    scheduler.step()
    print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2%} | Val Acc: {val_acc:.2%} | LR: {current_lr:.6f}")

# ====== 繪圖並儲存 ======
epochs = list(range(1, num_epochs + 1))
fig, axs = plt.subplots(4, 1, figsize=(8, 12))

axs[0].plot(epochs, loss_history, color='red', marker='o')
axs[0].set_title("Training Loss over Epochs")

axs[1].plot(epochs, train_acc_history, color='orange', marker='o')
axs[1].set_title("Training Accuracy over Epochs")

axs[2].plot(epochs, val_acc_history, color='blue', marker='o')
axs[2].set_title("Validation Accuracy over Epochs")

axs[3].plot(epochs, lr_history, color='green', marker='o')
axs[3].set_title("Learning Rate over Epochs")

for ax in axs:
    ax.set_xlabel("Epoch")
    ax.grid(True)

plt.tight_layout()
plt.savefig("five_class_training_metrics.png")
print("📊 已儲存五類訓練圖：five_class_training_metrics.png")
plt.show()


In [None]:
import os
import torch
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# ====== 路徑設定 ======
test_dir = '/home/ne6131039/Desktop/TEM_DATAS/TEM_split/test'
model_path = 'best_five_class_classifier.pth'
batch_size = 32
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ====== 圖像轉換 ======
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

# ====== 載入測試集 ======
test_dataset = datasets.ImageFolder(test_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
class_names = test_dataset.classes
num_classes = len(class_names)
print("測試類別順序：", class_names)

# ====== 建立模型 & 載入權重 ======
model = models.resnet18(pretrained=False)
model.fc = torch.nn.Linear(model.fc.in_features, num_classes)
model.load_state_dict(torch.load(model_path))
model = model.to(device)
model.eval()

# ====== 推論所有測試資料 ======
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

all_preds = np.array(all_preds)
all_labels = np.array(all_labels)
acc = (all_preds == all_labels).sum() / len(all_labels)
print(f"\n✅ 測試準確率：{acc:.2%}")

# ====== 混淆矩陣 ======
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix (Five-Class Test)')
plt.tight_layout()
plt.savefig("five_class_test_confusion_matrix.png")
plt.show()

# ====== 詳細報告 ======
print("\n📋 詳細分類報告：")
print(classification_report(all_labels, all_preds, target_names=class_names))
