# 第六章：卷积神经网络
湖北理工学院《机器学习》课程资料

作者：李辉楚吴

笔记内容概述: 迁移学习、ResNet

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

label_size = 18 # Label size
ticklabel_size = 14 # Tick label size
    
# Define a transform to normalize the data
transform = transforms.Compose([
    transforms.ToTensor()
])

# Load test data from the MNIST
testset = torchvision.datasets.CIFAR10(root='./Data', train=False, download=False, transform=transform)
print(f"Test set size: {len(testset)}")

# Load training data from the MNIST
trainset = torchvision.datasets.CIFAR10(root='./Data', train=True, download=False, transform=transform)
print(f"Training set size: {len(trainset)}")

# Rate of trX and cvX
tr_cv_rate = 0.8

# Create a list to store indices for each class unique()
class_indices = [[] for _ in range(10)]  # 10 classes in MNIST

# Populate class_indices
for idx, (_, label) in enumerate(trainset):
    class_indices[label].append(idx)

# Calculate the number of samples for each class in training and validation sets
train_size_per_class = int(tr_cv_rate * min(len(indices) for indices in class_indices))
val_size_per_class = min(len(indices) for indices in class_indices) - train_size_per_class

# Create balanced train and validation sets
train_indices = []
val_indices = []
for indices in class_indices:
    train_indices.extend(indices[:train_size_per_class])
    val_indices.extend(indices[train_size_per_class:train_size_per_class + val_size_per_class])

# Create Subset datasets
from torch.utils.data import Subset
trX = Subset(trainset, train_indices)
cvX = Subset(trainset, val_indices)

print(f"Number of training samples: {len(trX)}")
print(f"Number of cross-validation samples: {len(cvX)}")

构建DataLoaders，准备训练模型

In [None]:
batch_size = 64

def one_hot_collate(batch):
    data = torch.stack([item[0] for item in batch])
    labels = torch.tensor([item[1] for item in batch])
    one_hot_labels = torch.zeros(labels.size(0), 10)  # 10 classes in MNIST 【0，1，0，0】
    one_hot_labels.scatter_(1, labels.unsqueeze(1), 1)
    return data, one_hot_labels

trLoader = torch.utils.data.DataLoader(trX, batch_size=batch_size, shuffle=True, num_workers=0, collate_fn=one_hot_collate)
cvLoader = torch.utils.data.DataLoader(cvX, batch_size=batch_size, shuffle=False, num_workers=0, collate_fn=one_hot_collate)
teLoader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=0, collate_fn=one_hot_collate)

# Get a batch of training data
dataiter = iter(trLoader)
data, labels = next(dataiter)

image_channels = data[0].numpy().shape[0]
print(f'image_channels is {image_channels}')
print(labels[0,:])

# Label text of CIFAR-10
label_text = ['airplane', 'automobile', 'bird', 'cat', 'deer', 
              'dog', 'frog', 'horse', 'ship', 'truck']

# Plot one image from the batch
plt.figure(figsize=(6, 6))
# Modify the imshow line to handle RGB images correctly
plt.imshow(data[0].permute(1, 2, 0).numpy())  # Rearrange from (3,32,32) to (32,32,3)
plt.title(f'Label: {label_text[labels[0].argmax().item()]}')
plt.axis('off')
plt.show()

### 迁移ResNet微调FNN层

In [None]:
import torch.nn as nn

# 1. 加载预训练模型
model = torchvision.models.resnet18(pretrained=True)
print(model)
# 2. 修改输入层 (因为 MNIST 是单通道图像)
model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)

# 3. 移除第一层Maxpooling避免参数过早消失
model.maxpool = nn.Identity() # nn.Conv2d(64, 64, 1, 1, 1)

# 4. 修改输出层 (根据任务的类别数)
model.fc = nn.Linear(model.fc.in_features, 10)  # 10为MNIST的类别数

# 打印模型结构
print(model)

微调ResNet18

In [None]:
# 定义损失函数
criterion = nn.CrossEntropyLoss()

# 只优化未冻结的参数
# optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()))
optimizer = torch.optim.Adam(model.parameters())

# 训练模型
num_epochs = 5
train_losses = []
cv_losses = []

for epoch in range(num_epochs):
    model.train()
    for images, labels in trLoader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        print(f'Epoch {epoch+1}/{num_epochs}, Training Loss: {loss.item():.4f}')
        
    train_losses.append(loss.item())

    # 计算交叉验证损失
    model.eval()
    cv_loss = 0.0
    with torch.no_grad():
        for images, labels in cvLoader:
            outputs = model(images)
            loss = criterion(outputs, labels)
            cv_loss += loss.item()
    cv_losses.append(cv_loss / len(cvLoader))

    print(f'Epoch {epoch+1}/{num_epochs}, Training Loss: {train_losses[-1]:.4f}, Cross-Validation Loss: {cv_losses[-1]:.4f}')

# 保存模型
torch.save(model.state_dict(), 'mnist_resnet18_finetuned.pth')

计算识别精度，展示学习曲线

In [None]:
# Calculate and print accuracies for training and cross-validation sets
model.eval()
with torch.no_grad():
    # Training set accuracy
    tr_correct = 0
    tr_total = 0
    for images, labels in trLoader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        _, true_labels = torch.max(labels, 1)
        tr_total += labels.size(0)
        tr_correct += (predicted == true_labels).sum().item()
    
    tr_accuracy = 100 * tr_correct / tr_total
    
    # Test set accuracy
    cv_correct = 0
    cv_total = 0
    for images, labels in cvLoader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        _, true_labels = torch.max(labels, 1)
        cv_total += labels.size(0)
        cv_correct += (predicted == true_labels).sum().item()
    
    cv_accuracy = 100 * cv_correct / cv_total

print(f'Accuracy on training set: {tr_accuracy:.2f}%')
print(f'Accuracy on cross-validation set: {cv_accuracy:.2f}%')

# Plot training and cross-validation losses
plt.figure(figsize=(10, 5))
plt.plot(range(1, num_epochs+1), train_losses, label='Training Loss')
plt.plot(range(1, num_epochs+1), cv_losses, label='Cross-Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Cross-Validation Loss')
plt.legend()
plt.show()

计算测试精度

In [None]:
model.eval()
with torch.no_grad():
    test_correct = 0
    test_total = 0
    for images, labels in teLoader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        _, true_labels = torch.max(labels, 1)
        test_total += labels.size(0)
        test_correct += (predicted == true_labels).sum().item()
    test_accuracy = 100 * test_correct / test_total
    print(f'Accuracy on test set: {test_accuracy:.2f}%')