## ResNet34

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score, accuracy_score
import cv2
import numpy as np
import os
import torch.nn.functional as F
from PIL import Image

In [2]:
class ImageDataset(Dataset):
    def __init__(self, file_path, transform=None):
        with open(file_path, 'r') as f:
            lines = f.readlines()
        self.img_labels = [line.strip().split() for line in lines]
        self.transform = transform
        self.labels = [int(line.split()[1]) for line in lines]
        self.classes = list(set(self.labels))

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path, label = self.img_labels[idx]
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        label = int(label)

        if self.transform:
            image = self.transform(image)

        return image, label



In [3]:
# 定義殘差塊
class ResidualBlock(nn.Module):
    def __init__(self, in_channel, out_channel, stride=1, shortcut=None):
        super(ResidualBlock, self).__init__()
        self.left = nn.Sequential(
            nn.Conv2d(in_channel, out_channel, 3, stride, 1, bias=False),
            nn.BatchNorm2d(out_channel),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channel, out_channel, 3, 1, 1, bias=False),
            nn.BatchNorm2d(out_channel)
        )
        self.right = shortcut

    def forward(self, x):
        out = self.left(x)
        residual = x if self.right is None else self.right(x)
        out += residual
        return F.relu(out)

# 定義 ResNet 網絡
class ResNet(nn.Module):
    def __init__(self, num_classes):
        super(ResNet, self).__init__()
        self.pre_layer = nn.Sequential(
            nn.Conv2d(3, 64, 7, 2, 3, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(3, 2, 1)
        )
        self.layer1 = self._make_layer(64, 64, 3)
        self.layer2 = self._make_layer(64, 128, 4, stride=2)
        self.layer3 = self._make_layer(128, 256, 6, stride=2)
        self.layer4 = self._make_layer(256, 512, 3, stride=2)
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, in_channel, out_channel, block_num, stride=1):
        shortcut = nn.Sequential(
            nn.Conv2d(in_channel, out_channel, 1, stride, bias=False),
            nn.BatchNorm2d(out_channel)
        )
        layers = [ResidualBlock(in_channel, out_channel, stride, shortcut)]
        for i in range(1, block_num):
            layers.append(ResidualBlock(out_channel, out_channel))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.pre_layer(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = F.avg_pool2d(x, x.size()[3])
        x = x.view(x.size(0), -1)
        return self.fc(x)

In [4]:
# 定義數據轉換
transform = transforms.Compose([
    transforms.ToPILImage(),  # 確保圖像從 numpy array 轉換為 PIL Image
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# 創建數據集和數據加載器
train_dataset = ImageDataset(file_path='train.txt', transform=transform)
val_dataset = ImageDataset(file_path='val.txt', transform=transform)
test_dataset = ImageDataset(file_path='test.txt', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [5]:
# 定義 ResNet 模型、損失函數和優化器
resnet34 = ResNet(num_classes=len(train_dataset.classes))
optimizer = optim.Adam(resnet34.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [6]:
# 保存模型狀態的函數
def save_checkpoint(epoch, model, optimizer, loss, path="checkpoint.pth"):
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss,
    }, path)

# 加載模型狀態的函數
def load_checkpoint(path="checkpoint.pth"):
    checkpoint = torch.load(path)
    resnet34.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    start_epoch = checkpoint['epoch']
    loss = checkpoint['loss']
    return start_epoch, loss

In [7]:
# 訓練模型的函數
def train_model(num_epochs, start_epoch=0):
    for epoch in range(start_epoch, num_epochs):
        resnet34.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images = images.to(torch.float32)
            labels = labels.to(torch.int64)
            optimizer.zero_grad()
            outputs = resnet34(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}')

        # 保存模型狀態
        save_checkpoint(epoch + 1, resnet34, optimizer, running_loss / len(train_loader))

        # 驗證階段
        resnet34.eval()
        total = 0
        correct = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images = images.to(torch.float32)
                labels = labels.to(torch.int64)
                outputs = resnet34(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print(f'Validation Accuracy: {100 * correct / total}%')


In [8]:
# 檢查是否有保存的狀態，並加載
checkpoint_path = "checkpoint.pth"
if os.path.exists(checkpoint_path):
    start_epoch, _ = load_checkpoint(checkpoint_path)
    print(f"Resuming training from epoch {start_epoch}")
else:
    start_epoch = 0

Resuming training from epoch 50


In [9]:
# 評估模型
def evaluate_model(loader):
    resnet34.eval()
    all_preds, all_labels = [], []

    with torch.no_grad():
        for images, labels in loader:
            images = images.to(torch.float32)
            labels = labels.to(torch.int64)
            outputs = resnet34(images)
            _, predicted = torch.max(outputs, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average='macro')
    recall = recall_score(all_labels, all_preds, average='macro')
    f1 = f1_score(all_labels, all_preds, average='macro')

    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')

    print(classification_report(all_labels, all_preds, target_names=[str(cls) for cls in train_dataset.classes]))


In [10]:
# 訓練模型
train_model(50, start_epoch=start_epoch)

In [11]:
# 在測試集上執行評估
evaluate_model(test_loader)

Accuracy: 0.5156
Precision: 0.5611
Recall: 0.5156
F1 Score: 0.5119
              precision    recall  f1-score   support

           0       0.50      0.22      0.31         9
           1       0.50      0.78      0.61         9
           2       0.43      0.33      0.38         9
           3       0.71      0.56      0.62         9
           4       0.45      0.56      0.50         9
           5       0.53      0.89      0.67         9
           6       0.67      0.67      0.67         9
           7       1.00      0.22      0.36         9
           8       0.33      0.56      0.42         9
           9       0.25      0.33      0.29         9
          10       0.40      0.44      0.42         9
          11       0.00      0.00      0.00         9
          12       0.86      0.67      0.75         9
          13       0.31      0.56      0.40         9
          14       0.38      0.33      0.35         9
          15       0.71      0.56      0.62         9
          16  