這個HW3＿4是使用原本的resnet 18 加上針對good以外的種類進行額外的資料強

In [None]:
import glob
import matplotlib.pyplot as plt
import random
from tqdm.auto import tqdm
import cv2
import numpy as np

In [None]:
import os

base_path = "/Users/adam0725/1332/1233/Pratice/train"

image_label_pairs = []

for root, dirs, files in os.walk(base_path):
    for file in files:
        if file.endswith(".png"):
            full_path = os.path.join(root, file)
            class_folder = os.path.basename(os.path.dirname(full_path))
            label = 0 if "good" in class_folder.lower() else 1
            image_label_pairs.append((full_path, label))

# 顯示前幾筆確認
print("共載入圖片數量：", len(image_label_pairs))
print("前 3 筆：", image_label_pairs[:3])

In [None]:
import cv2
import torch
import numpy as np
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split

# 讀入所有影像與標籤
all_images = []
all_labels = []

for path, label in image_label_pairs:
    img = cv2.imread(path)
    img = cv2.resize(img, (224, 224))  # 
    img = img[..., ::-1]  # BGR → RGB
    # img = img.astype(np.float32) / 255.0  
    all_images.append(img)
    all_labels.append(label)

# 轉為 tensor
x = torch.tensor(np.array(all_images)).permute(0, 3, 1, 2)  # N, C, H, W
y = torch.tensor(all_labels).float().unsqueeze(1)  # N, 1

# 切分訓練/驗證集（7:3）
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.3, random_state=42, stratify=y)

# 包裝為 DataLoader
train_dataset = TensorDataset(x_train, y_train)
val_dataset = TensorDataset(x_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [None]:
import matplotlib.pyplot as plt


class_names = sorted(set([os.path.basename(os.path.dirname(p)) for p, _ in image_label_pairs]))
print(f'Classes: {class_names}')

img_per_class = 5
fig, axs = plt.subplots(len(class_names), img_per_class, figsize=(6, 4 * len(class_names)))


for i, class_name in enumerate(class_names):
    
    class_indices = [idx for idx, (path, _) in enumerate(image_label_pairs) if class_name in path]

    for j in range(img_per_class):
        img = all_images[class_indices[j]]
        ax = axs[i, j] if len(class_names) > 1 else axs[j]
        ax.imshow(img)
        ax.set_title(f'{class_name} [{j}]')
        ax.axis('off')

plt.tight_layout()
plt.show()

In [None]:
import torch
from torch.utils.data import DataLoader, TensorDataset, random_split
import numpy as np

num_classes = 5
images_per_class = 10
train_images_per_class = int(images_per_class * 0.8)
val_images_per_class = int(images_per_class * 0.2)

x_train = []
x_val = []

for i in range(num_classes):
    start_index = i * images_per_class
    x_train.extend(all_images[start_index:start_index + train_images_per_class])
    x_val.extend(all_images[start_index + train_images_per_class:start_index + images_per_class])

# The shape changes from (batch_size, height, width, channels) to (batch_size, channels, height, width)
x_train = np.transpose(np.array(x_train), (0, 3, 1, 2))
x_val = np.transpose(np.array(x_val), (0, 3, 1, 2))

y_train = np.concatenate([np.full(train_images_per_class, i) for i in range(num_classes)])
y_val = np.concatenate([np.full(val_images_per_class, i) for i in range(num_classes)])

print(f'Shape of x_train: {x_train.shape}')
print(f'Shape of x_val: {x_val.shape}')
print(f'Shape of y_train: {y_train.shape}')
print(f'Shape of y_val: {y_val.shape}')

In [None]:
from torchvision import transforms
from torch.utils.data import Dataset
from PIL import Image
import numpy as np
import torch

# 對 good 類別使用保守轉換
good_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# 對瑕疵類別使用強化增強
defect_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(30),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3),
    transforms.RandomPerspective(distortion_scale=0.3, p=0.5),
    transforms.RandomAffine(degrees=20, shear=10),
    transforms.ToTensor(),
])

class MyDataset(Dataset):
    def __init__(self, x, y, class_names):
        self.x = x
        self.y = torch.from_numpy(y).long()
        self.class_names = class_names  # e.g., ['bent_2', 'color_2', 'flip_2', 'good', 'scratch_2']

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        img = self.x[idx]
        img_array = np.transpose(img, (1, 2, 0))  # (H, W, 3)
        img_array = np.clip(img_array * 255, 0, 255).astype('uint8')

        if img_array.shape != (224, 224, 3):
            print(f'[WARNING] Unexpected shape: {img_array.shape} (index {idx})')
            img_array = np.zeros((224, 224, 3), dtype='uint8')

        image = Image.fromarray(img_array)

        label = int(self.y[idx])
        class_name = self.class_names[label]

        # 根據類別選擇增強方式
        if class_name == 'good':
            image = good_transforms(image)
        else:
            image = defect_transforms(image)

        return image, label



In [None]:
batch_size = 8

class_names = ['bent_2', 'color_2', 'flip_2', 'good', 'scratch_2']
train_dataset = MyDataset(x_train, y_train, class_names=class_names)
val_dataset = MyDataset(x_val, y_val, class_names=class_names)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=0,pin_memory=False)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False, num_workers=0,pin_memory=False)

In [None]:
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models

model = models.resnet18(weights='IMAGENET1K_V1')

# ConvNet as fixed feature extractor (freeze parameters)
for param in model.parameters():
    param.requires_grad = False

num_ftrs = model.fc.in_features
num_class = 5

# change # of class from 1000 into 8 in the last layer
model.fc = nn.Sequential(
    nn.Linear(num_ftrs, 256),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(256, num_class)
)

In [None]:
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR, StepLR

train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

epochs = 20


model = model  

best_val_loss = float('inf')
best_val_acc = -1

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-2)
lr_scheduler = CosineAnnealingLR(optimizer, T_max=len(train_loader)*epochs, eta_min=0)

for epoch in tqdm(range(epochs)):
    # Training
    model.train()
    total_loss = 0.0
    train_correct = 0
    total_train_samples = 0

    for images, labels in train_loader:
     
        
        optimizer.zero_grad()
        outputs = model(images)
        labels = labels.long()

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

        train_predicted = outputs.argmax(-1)
        train_correct += (train_predicted == labels).sum().item()
        total_train_samples += labels.size(0)

    avg_train_loss = total_loss / len(train_loader)
    train_accuracy = 100. * train_correct / total_train_samples

    # Validation
    model.eval()
    total_val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
         
            #images = images / 255.
        
            outputs = model(images)
            labels = labels.long()

            loss = criterion(outputs, labels)
            total_val_loss += loss.item()

            predicted = outputs.argmax(-1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    avg_val_loss = total_val_loss / len(val_loader)
    val_accuracy = 100. * correct / total

    # Learning rate update
    lr_scheduler.step()

    # Checkpoint
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss

    if val_accuracy > best_val_acc:
        best_val_acc = val_accuracy
        torch.save(model.state_dict(), 'model_classification.pth')

    print(f'Epoch {epoch+1}/{epochs}, Train loss: {avg_train_loss:.4f}, Train acc: {train_accuracy:.4f}%, Val loss: {avg_val_loss:.4f}, Val acc: {val_accuracy:.4f}%, Best Val loss: {best_val_loss:.4f} Best Val acc: {best_val_acc:.2f}%')

    # Store performance
    train_losses.append(avg_train_loss)
    train_accuracies.append(train_accuracy)
    val_losses.append(avg_val_loss)
    val_accuracies.append(val_accuracy)


In [None]:
fig, ax = plt.subplots(1, 2, figsize=(15, 5))

# Plotting training and validation accuracy
ax[0].plot(train_accuracies)
ax[0].plot(val_accuracies)
ax[0].set_title('Model Accuracy')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Accuracy')
ax[0].legend(['Train', 'Val'])

# Plotting training and validation loss
ax[1].plot(train_losses)
ax[1].plot(val_losses)
ax[1].set_title('Model Loss')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Loss')
ax[1].legend(['Train', 'Val'])

plt.show()

In [None]:
# Load the trained weights
model.load_state_dict(torch.load('model_classification.pth'))

# Set the model to evaluation mode
model.eval()

test_correct = 0
test_total = 0

with torch.no_grad():
    for images, labels in val_loader:

        
        # images = (images) / 255.

      
        labels = labels.long()

        outputs = model(images)

        predicted = outputs.argmax(-1)
        print(predicted)
        print(labels)
        test_correct += (predicted == labels).sum().item()
        test_total += labels.size(0)

print(f'Test accuracy is {100. * test_correct / test_total}%')