# 实验九：基于CNN的图像识别
湖北理工学院《机器学习》课程NoteBook

学生：吴兴平

笔记内容概述: 卷积神经网络、手写字母识别

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

label_size = 18 # Label size
ticklabel_size = 14 # Tick label size
    
# Define a transform to normalize the data
transform = transforms.Compose([
    transforms.ToTensor()
])

# Load test data from the MNIST
testset = torchvision.datasets.MNIST(root='./Data', train=False, download=False, transform=transform)
print(f"Test set size: {len(testset)}")

# Load training data from the MNIST
trainset = torchvision.datasets.MNIST(root='./Data', train=True, download=False, transform=transform)
print(f"Training set size: {len(trainset)}")

# Rate of trX and cvX
tr_cv_rate = 0.8

# Create a list to store indices for each class unique()
class_indices = [[] for _ in range(10)]  # 10 classes in MNIST

# Populate class_indices
for idx, (_, label) in enumerate(trainset):
    class_indices[label].append(idx)

# Calculate the number of samples for each class in training and validation sets
train_size_per_class = int(tr_cv_rate * min(len(indices) for indices in class_indices))
val_size_per_class = min(len(indices) for indices in class_indices) - train_size_per_class

# Create balanced train and validation sets
train_indices = []
val_indices = []
for indices in class_indices:
    train_indices.extend(indices[:train_size_per_class])
    val_indices.extend(indices[train_size_per_class:train_size_per_class + val_size_per_class])

# Create Subset datasets
from torch.utils.data import Subset
trX = Subset(trainset, train_indices)
cvX = Subset(trainset, val_indices)

print(f"Number of training samples: {len(trX)}")
print(f"Number of cross-validation samples: {len(cvX)}")

Test set size: 10000
Training set size: 60000
Number of training samples: 43360
Number of cross-validation samples: 10850


构建DataLoaders，准备训练模型

In [3]:
batch_size = 42 # Define training batch 1，

def one_hot_collate(batch):
    data = torch.stack([item[0] for item in batch])
    labels = torch.tensor([item[1] for item in batch])
    one_hot_labels = torch.zeros(labels.size(0), 10)  # 10 classes in MNIST 【0，1，0，0】
    one_hot_labels.scatter_(1, labels.unsqueeze(1), 1)
    return data, one_hot_labels

trLoader = torch.utils.data.DataLoader(trX, batch_size=batch_size, shuffle=True, num_workers=0, collate_fn=one_hot_collate)
cvLoader = torch.utils.data.DataLoader(cvX, batch_size=batch_size, shuffle=False, num_workers=0, collate_fn=one_hot_collate)
teLoader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=0, collate_fn=one_hot_collate)

# Get a batch of training data
dataiter = iter(trLoader)
data, labels = next(dataiter)

image_channels = data[0].numpy().shape[0]
print(f'image_channels is {image_channels}')
print(labels)

image_channels is 1
tensor([[0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0

### 定义并训练卷积神经网络
* 输入：2-D图片
* 输出：手写字母类型的概率分布
* 卷积层：2层, 卷积核大小：3x3
* 隐藏层1：100个节点
* 隐藏层2：50个节点

In [5]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class CNN(nn.Module):
    def __init__(self, image_channels, num_classes):
        super(CNN, self).__init__()
        
        # First convolutional layer
        self.conv1 = nn.Conv2d(in_channels=image_channels, out_channels=32, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Second convolutional layer
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Fully connected layers
        self.fc1 = nn.Linear(64 * 7 * 7, 100)  # After two 2x2 max pools, 28x28 -> 7x7
        self.fc2 = nn.Linear(100, num_classes)  # 10 classes output

        # Softmax
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        # Remove the reshape operation and directly use x
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        
        # Second conv layer
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        
        # Flatten the output for the fully connected layers
        x = x.view(-1, 64 * 7 * 7)
        
        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        
        # Softmax
        x = self.softmax(x)
        
        return x

# Initialize the model
model = CNN(image_channels, 10)
# 检查CUDA是否可用
if torch.cuda.is_available():
    model = model.cuda()
    print("CUDA is available. Model is moved to GPU.")
else:
    print("CUDA is not available. Model will run on CPU.")

# Display model architecture
print(model)

CUDA is not available. Model will run on CPU.
CNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=3136, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=10, bias=True)
  (softmax): Softmax(dim=1)
)


使用Adam作为Optimizor训练模型

In [7]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss() # Loss
optimizer = torch.optim.Adam(model.parameters()) # Adam

# Lists to store losses
train_losses = []
cv_losses = []

# Number of epochs
num_epochs = 50

for epoch in range(num_epochs):
    model.train()
    batch_losses = []
    
    for batch_x, batch_y in trLoader:
        # Forward pass
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        
        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        batch_losses.append(loss.item())
    
    # Calculate average training loss for this epoch
    avg_train_loss = sum(batch_losses) / len(batch_losses)
    train_losses.append(avg_train_loss)
    
    # Evaluate on cross-validation set
    model.eval()
    cv_batch_losses = []
    with torch.no_grad():
        for cv_x, cv_y in cvLoader:
            cv_outputs = model(cv_x)
            cv_loss = criterion(cv_outputs, cv_y)
            cv_batch_losses.append(cv_loss.item())
    
    avg_cv_loss = sum(cv_batch_losses) / len(cv_batch_losses)
    cv_losses.append(avg_cv_loss)
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, CV Loss: {avg_cv_loss:.4f}')

Epoch [1/50], Train Loss: 1.6525, CV Loss: 1.5961
Epoch [2/50], Train Loss: 1.5448, CV Loss: 1.4927
Epoch [3/50], Train Loss: 1.4845, CV Loss: 1.4921
Epoch [4/50], Train Loss: 1.4796, CV Loss: 1.4846
Epoch [5/50], Train Loss: 1.4764, CV Loss: 1.4811
Epoch [6/50], Train Loss: 1.4741, CV Loss: 1.4845
Epoch [7/50], Train Loss: 1.4732, CV Loss: 1.4811
Epoch [8/50], Train Loss: 1.4717, CV Loss: 1.4778
Epoch [9/50], Train Loss: 1.4703, CV Loss: 1.4802
Epoch [10/50], Train Loss: 1.4705, CV Loss: 1.4803
Epoch [11/50], Train Loss: 1.4696, CV Loss: 1.4770
Epoch [12/50], Train Loss: 1.4700, CV Loss: 1.4790
Epoch [13/50], Train Loss: 1.4691, CV Loss: 1.4790
Epoch [14/50], Train Loss: 1.4686, CV Loss: 1.4762
Epoch [15/50], Train Loss: 1.4682, CV Loss: 1.4768
Epoch [16/50], Train Loss: 1.4677, CV Loss: 1.4766
Epoch [17/50], Train Loss: 1.4676, CV Loss: 1.4786
Epoch [18/50], Train Loss: 1.4677, CV Loss: 1.4794
Epoch [19/50], Train Loss: 1.4667, CV Loss: 1.4762
Epoch [20/50], Train Loss: 1.4672, CV Lo

计算识别精度，展示学习曲线

In [None]:
# Calculate and print accuracies for training and cross-validation sets
model.eval()
with torch.no_grad():
    # Training set accuracy
    tr_correct = 0
    tr_total = 0
    for images, labels in trLoader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        _, true_labels = torch.max(labels, 1)
        tr_total += labels.size(0)
        tr_correct += (predicted == true_labels).sum().item()
    
    tr_accuracy = 100 * tr_correct / tr_total
    
    # Test set accuracy
    cv_correct = 0
    cv_total = 0
    for images, labels in cvLoader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        _, true_labels = torch.max(labels, 1)
        cv_total += labels.size(0)
        cv_correct += (predicted == true_labels).sum().item()
    
    cv_accuracy = 100 * cv_correct / cv_total

print(f'Accuracy on training set: {tr_accuracy:.2f}%')
print(f'Accuracy on cross-validation set: {cv_accuracy:.2f}%')

# Plot training and cross-validation losses
plt.figure(figsize=(10, 5))
plt.plot(range(1, num_epochs+1), train_losses, label='Training Loss')
plt.plot(range(1, num_epochs+1), cv_losses, label='Cross-Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Cross-Validation Loss')
plt.legend()
plt.show()

Accuracy on training set: 99.67%
Accuracy on cross-validation set: 98.76%
