In [None]:
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10
import torchvision.transforms as transforms
import torch
# 下载数据集的路径
path = './DataSet'
# 使用的设备
device = torch.device("cuda")
# 数据预处理,转换为张量并归一化
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 将图像大小调整为 224x224
    transforms.ToTensor(),  # 将图像转换为张量
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])  # 归一化处理

train_set = CIFAR10(root=path, train=True, download=True, transform=transform)
test_set = CIFAR10(root=path, train=False, download=True, transform=transform)
# 加载数据集并设置批次大小
train_loader = DataLoader(train_set, batch_size=128, shuffle=True)
test_loader = DataLoader(test_set, batch_size=128, shuffle=False)
# 修正 print 语句，明确输出内容
print(f"训练集的 batch 数量: {len(train_loader)}，每个 batch 中的数据数量: {train_loader.batch_size}")
print(f"测试集的 batch 数量: {len(test_loader)}，每个 batch 中的数据数量: {test_loader.batch_size}")

Files already downloaded and verified
Files already downloaded and verified
训练集的 batch 数量: 98，每个 batch 中的数据数量: 512
测试集的 batch 数量: 20，每个 batch 中的数据数量: 512


In [2]:
from torch.nn import Module
from torch.nn import Conv2d
from torch.nn import Linear
from torch.nn import MaxPool2d
from torch.nn import ReLU
from torch.nn import Flatten
from torch.nn import Dropout
from torch.nn import Sequential

from torch.nn.init import kaiming_uniform_
from torch.nn.init import xavier_uniform_


class AlexNet(Module):
    def __init__(self):
        super().__init__()
        # 定义特征提取部分
        self.feature = Sequential(
            # 卷积层 1：输入通道数为 3，输出通道数为 96，卷积核大小为 11x11，步幅为 4，填充为 1
            # 使用 Kaiming 初始化方法初始化卷积层 1 的权重，适用于 ReLU 激活函数
            # 池化层 1：使用 3x3 的最大池化，步长为 2
            Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4), padding=(1, 1)),
            ReLU(),
            MaxPool2d(kernel_size=(3, 3), stride=(2, 2)),
            # 卷积层 2：输入通道数为 96，输出通道数为 256，卷积核大小为 5x5，填充为 2
            # 使用 Kaiming 初始化方法初始化卷积层 2 的权重，适用于 ReLU 激活函数
            # 池化层 2：使用 3x3 的最大池化，步长为 2
            Conv2d(96, 256, kernel_size=(5, 5), padding=(2, 2)),
            ReLU(),
            MaxPool2d(kernel_size=(3, 3), stride=(2, 2)),
            # 卷积层 3：输入通道数为 256，输出通道数为 384，卷积核大小为 3x3，填充为 1
            # 使用 Kaiming 初始化方法初始化卷积层 3 的权重，适用于 ReLU 激活函数
            Conv2d(256, 384, kernel_size=(3, 3), padding=(1, 1)),
            ReLU(),
            # 卷积层 4：输入通道数为 384，输出通道数为 384，卷积核大小为 3x3，填充为 1
            # 使用 Kaiming 初始化方法初始化卷积层 4 的权重，适用于 ReLU 激活函数
            Conv2d(384, 384, kernel_size=(3, 3), padding=(1, 1)),
            ReLU(),
            # 卷积层 5：输入通道数为 384，输出通道数为 256，卷积核大小为 3x3，填充为 1
            # 使用 Kaiming 初始化方法初始化卷积层 5 的权重，适用于 ReLU 激活函数
            Conv2d(384, 256, kernel_size=(3, 3), padding=(1, 1)),
            ReLU(),
            # 做了三次卷积之后，才进行一次池化
            # 池化层 3：使用 3x3 的最大池化，步长为 2
            MaxPool2d(kernel_size=(3, 3), stride=(2, 2))
        )
        # 对特征提取部分的卷积层进行 Kaiming 初始化
        for layer in self.feature:
            if isinstance(layer, Conv2d):
                kaiming_uniform_(layer.weight, nonlinearity='relu')
        # 定义分类部分
        self.classifier = Sequential(
            # 添加 Flatten 层
            Flatten(),
            # 全连接层 1：输入维度为 6400，输出维度为 4096
            # 使用 Kaiming 初始化方法初始化全连接层 1 的权重，适用于 ReLU 激活函数，并且使用dropout
            Linear(6400, 4096),
            ReLU(),
            Dropout(p=0.5),
            # 全连接层 2：输入维度为 4096，输出维度为 4096
            # 使用 Kaiming 初始化方法初始化全连接层 2 的权重，适用于 ReLU 激活函数，并且使用dropout
            Linear(4096, 4096),
            ReLU(),
            Dropout(p=0.5),
            # 输出层：输入维度为 4096，输出维度为 10，对应 CIFAR-10 的 10 个类别
            Linear(4096, 10)
        )
        # 对分类器部分的全连接层进行 Kaiming 初始化
        for layer in self.classifier:
            if isinstance(layer, Linear):
                if layer is not self.classifier[-1]:
                    kaiming_uniform_(layer.weight, nonlinearity='relu')
                else:
                    xavier_uniform_(layer.weight)
    # 前向传播
    def forward(self, x):
        x = self.feature(x)
        x = self.classifier(x)
        return x

In [4]:
# 训练模型
# 定义损失函数和优化器
from torch.nn import CrossEntropyLoss
from torch.optim import SGD
import time

def train_model(train_loader, model):
    # 定义损失函数和优化器
    criterion = CrossEntropyLoss()
    optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9)
    # 训练的轮数
    num_epochs = 10
    # 初始化 running_loss
    running_loss = 0.0
    # 记录训练开始时间
    start_time = time.time()
    # 遍历epoch
    for epoch in range(num_epochs):
        # 遍历训练数据
        for i, (inputs, labels) in enumerate(train_loader):
            # 将数据移动到GPU
            inputs = inputs.to(device)
            labels = labels.to(device)
            # 梯度清零
            optimizer.zero_grad()
            # 计算模型输出
            y_hat = model(inputs)
            # 计算损失
            loss = criterion(y_hat, labels)
            # 反向传播
            loss.backward()
            # 更新参数
            optimizer.step()
            running_loss += loss.item()
        # 打印每个 epoch 的损失
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader)}')
        # 每个 epoch 结束后重置 running_loss
        running_loss = 0.0
    # 记录训练结束时间
    end_time = time.time()
    # 计算训练总时间
    total_time = end_time - start_time
    # 转换为分钟和秒
    minutes = int(total_time // 60)
    seconds = total_time % 60
    print(f'Training completed in {minutes} minutes and {seconds:.2f} seconds.')

In [5]:
# 评估模型
from sklearn.metrics import accuracy_score
from numpy import vstack
from numpy import argmax


def evaluate_model(test_loader, model):
    predictions, actuals = list(), list()
    for i, (inputs, labels) in enumerate(test_loader):
        # 将数据移动到GPU
        inputs = inputs.to(device)
        labels = labels.to(device)
        # 计算模型输出
        y_hat = model(inputs)
        # 转换为 numpy 数据类型
        y_hat = y_hat.detach().cpu().numpy()
        actual = labels.cpu().numpy()
        # 转换为类标签
        y_hat = argmax(y_hat, axis=1)
        # 为stack格式化
        actual = actual.reshape((len(actual), 1))
        y_hat = y_hat.reshape((len(y_hat), 1))
        # 存储
        predictions.append(y_hat)
        actuals.append(actual)
    predictions, actuals = vstack(predictions), vstack(actuals)
    # 计算准确率
    acc = accuracy_score(actuals, predictions)
    return acc

In [6]:
# 产生实例,并且将实例放入GPU
AlexNet_model = AlexNet()
AlexNet_model.to(device)
# 训练实例
train_model(train_loader, AlexNet_model)
# 评估实例整体准确率
acc = evaluate_model(test_loader, AlexNet_model)
print('Overall Accuracy: %.3f' % acc)

Epoch 1/10, Loss: 2.0321377369822287
Epoch 2/10, Loss: 1.470605987675336
Epoch 3/10, Loss: 1.2526810315190529
Epoch 4/10, Loss: 1.105035921140593
Epoch 5/10, Loss: 0.9831318691068766
Epoch 6/10, Loss: 0.8522634846823556
Epoch 7/10, Loss: 0.7738189052562324
Epoch 8/10, Loss: 0.7127545092787061
Epoch 9/10, Loss: 0.6578454721947106
Epoch 10/10, Loss: 0.590369524700301
Training completed in 7 minutes and 28.81 seconds.
Overall Accuracy: 0.744


In [7]:
# 保存训练好的模型
PATH = '../DataSet/model/AlexNet.pth'
torch.save(AlexNet_model.state_dict(), PATH)

In [8]:
AlexNet_model = AlexNet().to(device)
AlexNet_model.load_state_dict(torch.load(PATH, weights_only=True))
classes = ('plane', 'car', 'bird', 'cat', 'deer','dog', 'frog', 'horse', 'ship', 'truck')
# 初始化分类准确率统计
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# 评估每个类别的准确率
with torch.no_grad():
    AlexNet_model.eval()  # 确保模型处于评估模式
    for data in test_loader:
        images, labels = data
        # 将数据移动到GPU
        images, labels = images.to(device), labels.to(device)
        outputs = AlexNet_model(images)  # 使用实例化模型
        _, predictions = torch.max(outputs, 1)

        # 将预测结果和标签移动到CPU并转换为numpy数组
        labels = labels.cpu().numpy()
        predictions = predictions.cpu().numpy()

        for label_idx, pred_idx in zip(labels, predictions):
            class_name = classes[label_idx]
            if label_idx == pred_idx:
                correct_pred[class_name] += 1
            total_pred[class_name] += 1

# 打印每个类别的准确率
for classname in classes:
    total = total_pred[classname]
    correct = correct_pred[classname]
    if total == 0:
        print(f"Class {classname} has no test samples")
        continue
    accuracy = 100 * correct / total
    print(f'Accuracy for {classname:5s}: {accuracy:.1f}%')

Accuracy for plane: 88.4%
Accuracy for car  : 92.8%
Accuracy for bird : 62.3%
Accuracy for cat  : 58.7%
Accuracy for deer : 81.5%
Accuracy for dog  : 59.6%
Accuracy for frog : 82.2%
Accuracy for horse: 73.1%
Accuracy for ship : 80.6%
Accuracy for truck: 81.3%
