In [257]:
import torch.optim
import torchvision
from torch.utils.data import DataLoader
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
from sklearn.metrics import precision_recall_fscore_support
import numpy as np

In [258]:
laring_rate = 0.0001
batch_size = 256
epoch = 70
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [259]:
train_dataset = torchvision.datasets.CIFAR10("./dataset",transform=torchvision.transforms.Compose([
    torchvision.transforms.RandomRotation((-90,90)),
    torchvision.transforms.ToTensor()]),train=True,download=True)
test_dataset = torchvision.datasets.CIFAR10("./dataset",transform=torchvision.transforms.ToTensor(),train=False,download=True)

Files already downloaded and verified
Files already downloaded and verified


In [260]:
print("训练集的长度为：{}".format(len(train_dataset)))
print("测试集的长度为：{}".format(len(test_dataset)))

训练集的长度为：50000
测试集的长度为：10000


In [261]:
train_dataloader = DataLoader(train_dataset,batch_size,shuffle=True,drop_last=True)
test_dataloader = DataLoader(test_dataset,batch_size,shuffle=True,drop_last=True)

CIFAR10网络结构改进详解
我将详细解释每个改进层的添加目的和意义，帮助您理解这些修改如何提升模型性能：
1. Batch Normalization (批归一化层)
​​添加位置​​：在每个卷积层之后、激活函数之前
​​目的和意义​​：
​​稳定训练过程​​：通过标准化每层的输入分布，减少内部协变量偏移(Internal Covariate Shift)
​​允许使用更高学习率​​：BN使网络对学习率的选择更鲁棒
​​减少对初始化的依赖​​：降低网络对权重初始化的敏感性
​​轻微的正则化效果​​：通过每个batch的统计量引入噪声
​​加速收敛​​：实验表明BN可以使训练速度提高10倍以上
​​为什么需要​​：在原始结构中，深层网络容易因输入分布变化而训练困难，BN解决了这个问题
2. ReLU激活函数
​​添加位置​​：在每个卷积层和全连接层之后
​​目的和意义​​：
​​引入非线性​​：使网络能够学习复杂模式（原始结构缺少激活函数，相当于线性变换）
​​解决梯度消失​​：相比Sigmoid/Tanh，ReLU在正区间梯度恒为1
​​稀疏激活​​：只有部分神经元被激活，提高计算效率
​​加速收敛​​：线性特性使梯度计算更简单高效
​​为什么需要​​：原始结构没有激活函数，网络只能表达线性关系，无法学习复杂特征
3. Dropout层
​​添加位置​​：在全连接层之间
​​目的和意义​​：
​​防止过拟合​​：随机丢弃部分神经元(50%和30%)，强制网络学习冗余表示
​​模型集成效果​​：每次前向传播相当于不同的子网络
​​减少神经元共适应​​：防止神经元过度依赖特定特征
​​提高泛化能力​​：在CIFAR10这种小数据集上尤为重要
​​为什么需要​​：原始结构没有正则化措施，容易在训练集上过拟合
4. 改进的全连接层结构
​​原始结构​​：64×4×4(1024维) → 64维 → 10维
​​改进结构​​：1024维 → 256维 → 128维 → 10维
​​目的和意义​​：
​​平缓降维​​：避免信息瓶颈，保留更多特征信息
​​增加模型容量​​：256和128维的中间层提供更多学习空间
​​分层特征提取​​：允许网络学习更抽象的高级特征
​​减少信息损失​​：1024→64的骤降可能导致重要特征丢失
​​为什么需要​​：原始结构降维过于激进，可能丢失重要分类信息
5. 分离特征提取和分类器
​​结构变化​​：将网络分为features和classifier两个模块
​​目的和意义​​：
​​模块化设计​​：提高代码可读性和可维护性
​​便于迁移学习​​：可单独替换分类器用于其他任务
​​清晰职责划分​​：卷积部分专注特征提取，全连接部分专注分类
​​调试更方便​​：可独立检查各模块输出
​​为什么需要​​：原始单一序列结构不利于扩展和调试

In [262]:
#实验表明明显拟合效果以及泛化能力不如下面改进过的网络
# class CIFAR10(nn.Module):
#     def __init__(self):
#         super(CIFAR10, self).__init__()
#         self.module = nn.Sequential(
#             nn.Conv2d(in_channels=3,out_channels=32,kernel_size=5,stride=1,padding=2),
#             nn.MaxPool2d(kernel_size=2),
#             nn.Conv2d(in_channels=32,out_channels=32,kernel_size=5,stride=1,padding=2),
#             nn.MaxPool2d(kernel_size=2),
#             nn.Conv2d(in_channels=32,out_channels=64,kernel_size=5,stride=1,padding=2),
#             nn.MaxPool2d(kernel_size=2),
#             nn.Flatten(),
#             nn.Linear(64*4*4,128),
#             nn.Linear(128,64),
#             nn.Linear(64,10)
#         )
#
#     def forward(self,x):
#         x = self.module(x)
#         return x
# 修改后的网络
class CIFAR10(nn.Module):
    def __init__(self):
        super(CIFAR10, self).__init__()
        self.features = nn.Sequential(
            # 输入: 3x32x32
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(32),#
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),  # 输出: 32x16x16

            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),  # 输出: 32x8x8

            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),  # 输出: 64x4x4
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 4 * 4, 256),
            nn.ReLU(),
            nn.Dropout(0.5),

            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

In [263]:
writer = SummaryWriter("./tensorboard/complate")

In [264]:
#创建网络模型
cifar = CIFAR10()
cifar.load_state_dict(torch.load("best_cifar10.pth"))#是否在以前的基础上再进行训练
cifar = cifar.to(device)
# if torch.cuda.is_available():
#     cifar = cifar.cuda()
#writer.add_graph(CIFAR10,input)

In [265]:
#损失函数
lose_fuc = nn.CrossEntropyLoss()
lose_fuc = lose_fuc.to(device)
# if torch.cuda.is_available():
#     lose_fuc = lose_fuc.cuda()

In [266]:
#优化器
optimizer = torch.optim.Adam(cifar.parameters(), lr=laring_rate, weight_decay=1e-4)

In [267]:
total_train_step = 0
total_test_step = 0
class_names = ('plane', 'car', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck')
for i in range(epoch):
    cifar.train()
    print("---------------------------------------------第 {} 轮训练开始-------------------------------------------------------".format(i+1))
    for data in train_dataloader:
        imgs , tag = data
        imgs = imgs.to(device)
        tag = tag.to(device)
        # if torch.cuda.is_available():
        #     imgs = imgs.cuda()
        #     tag = tag.cuda()
        outputs = cifar(imgs)
        lose = lose_fuc( outputs , tag)

        #开始反向传播，进行梯度下降
        optimizer.zero_grad()
        lose.backward()
        optimizer.step()

        total_train_step += 1
        if total_train_step % 100 == 0:
            print("训练次数：{}，loss:{}".format(total_train_step,lose))
            writer.add_scalar("train_lose",lose,total_train_step)

    # 测试步骤开始
    cifar.eval()
    total_test_lose = 0
    all_preds = []
    all_targets = []
    the_best_accuaray = 0

    with torch.no_grad():
        for data in test_dataloader:
            imgs, tags = data
            imgs = imgs.to(device)
            tags = tags.to(device)

            outputs = cifar(imgs)
            lose = lose_fuc(outputs, tags)
            total_test_lose += lose.item()

            # 收集预测和真实标签
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())#将所有的预测值都加入all_preds列表中
            all_targets.extend(tags.cpu().numpy())#将所有的真实值都加入all_targetss列表中

    # 转换为numpy数组
    all_preds = np.array(all_preds)
    all_targets = np.array(all_targets)

    # 计算整体准确率
    accuracy = (all_preds == all_targets).mean()

    # 计算每个类别的精确率、召回率和F1值
    try:
        # 尝试使用新版本的参数
        precision, recall, f1, _ = precision_recall_fscore_support(
            all_targets, all_preds, average=None, zero_division=0
        )
    except TypeError:
        # 如果出现错误，使用旧版本兼容方式
        precision, recall, f1, _ = precision_recall_fscore_support(
            all_targets, all_preds, average=None
        )
        # 手动处理除零错误
        precision = np.nan_to_num(precision, nan=0.0)
        recall = np.nan_to_num(recall, nan=0.0)
        f1 = np.nan_to_num(f1, nan=0.0)

    # 计算宏平均（macro-average）指标
    macro_precision = np.mean(precision)
    macro_recall = np.mean(recall)
    macro_f1 = np.mean(f1)

    # 打印结果
    print(f"整体测试集上的loss: {total_test_lose / len(test_dataloader):.4f}")
    print(f"整体测试集上的准确率: {accuracy:.4f}")
    print(f"宏平均精确率: {macro_precision:.4f}")
    print(f"宏平均召回率: {macro_recall:.4f}")
    print(f"宏平均F1值: {macro_f1:.4f}")

    # 记录到TensorBoard
    writer.add_scalar("test_loss", total_test_lose / len(test_dataloader), total_test_step)
    writer.add_scalar("test_accuracy", accuracy, total_test_step)
    writer.add_scalar("test_precision", macro_precision, total_test_step)
    writer.add_scalar("test_recall", macro_recall, total_test_step)
    writer.add_scalar("test_f1", macro_f1, total_test_step)

    # 记录每个类别的指标
    for i, class_name in enumerate(class_names):
        writer.add_scalar(f"class_{class_name}/precision", precision[i], total_test_step)
        writer.add_scalar(f"class_{class_name}/recall", recall[i], total_test_step)
        writer.add_scalar(f"class_{class_name}/f1", f1[i], total_test_step)

    total_test_step += 1
    if accuracy > the_best_accuaray:
        torch.save(cifar.state_dict(), "best_cifar10.pth")
        the_best_accuaray = accuracy
    torch.save(cifar.state_dict(), "last_cifar10.pth")
    print("模型已保存")

---------------------------------------------第 1 轮训练开始-------------------------------------------------------
训练次数：100，loss:0.9371758699417114
整体测试集上的loss: 0.6826
整体测试集上的准确率: 0.7673
宏平均精确率: 0.7655
宏平均召回率: 0.7673
宏平均F1值: 0.7654
模型已保存
---------------------------------------------第 2 轮训练开始-------------------------------------------------------
训练次数：200，loss:0.8451423645019531
训练次数：300，loss:0.7438347935676575
整体测试集上的loss: 0.6777
整体测试集上的准确率: 0.7700
宏平均精确率: 0.7675
宏平均召回率: 0.7700
宏平均F1值: 0.7680
模型已保存
---------------------------------------------第 3 轮训练开始-------------------------------------------------------
训练次数：400，loss:0.81822270154953
训练次数：500，loss:0.782566249370575
整体测试集上的loss: 0.6789
整体测试集上的准确率: 0.7686
宏平均精确率: 0.7655
宏平均召回率: 0.7686
宏平均F1值: 0.7663
模型已保存
---------------------------------------------第 4 轮训练开始-------------------------------------------------------
训练次数：600，loss:0.5952942371368408
训练次数：700，loss:0.6876346468925476
整体测试集上的loss: 0.6735
整体测试集上的准确率: 0.7720
宏平均精确率: 0.7690
宏平均召回率: 

KeyboardInterrupt: 

In [None]:
writer.close()

In [283]:
from PIL import Image

img_path = "./image/test/dog5.png"
img = Image.open(img_path)
img = img.convert('RGB')
print(img)

transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize((32,32)),
    torchvision.transforms.ToTensor()
])

img = transform(img)
print(img)

model = CIFAR10()
model.load_state_dict(torch.load("best_cifar10.pth"))

img = torch.reshape(img,(1,3,32,32))

model.eval()
with torch.no_grad():
    output = model(img)
print(output)
result_list = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']
print(result_list[output.argmax(1)])

<PIL.Image.Image image mode=RGB size=137x147 at 0x19C807910C8>
tensor([[[0.5961, 0.6353, 0.6627,  ..., 0.5216, 0.5255, 0.5137],
         [0.5490, 0.5765, 0.6078,  ..., 0.4902, 0.4902, 0.4784],
         [0.4784, 0.4784, 0.4863,  ..., 0.4902, 0.4784, 0.4627],
         ...,
         [0.2902, 0.3255, 0.2588,  ..., 0.3608, 0.3255, 0.3529],
         [0.2941, 0.3294, 0.4471,  ..., 0.3804, 0.2824, 0.2980],
         [0.3098, 0.4745, 0.8157,  ..., 0.4275, 0.3451, 0.3686]],

        [[0.5647, 0.6118, 0.6392,  ..., 0.6784, 0.6784, 0.6745],
         [0.5294, 0.5608, 0.5922,  ..., 0.6549, 0.6549, 0.6471],
         [0.5686, 0.5725, 0.5843,  ..., 0.6510, 0.6471, 0.6392],
         ...,
         [0.4745, 0.5020, 0.4196,  ..., 0.5176, 0.4824, 0.5294],
         [0.4706, 0.4902, 0.5608,  ..., 0.5373, 0.4314, 0.4667],
         [0.4588, 0.5765, 0.8510,  ..., 0.5961, 0.5137, 0.5451]],

        [[0.4863, 0.5373, 0.5647,  ..., 0.3490, 0.3373, 0.3333],
         [0.4549, 0.4824, 0.5176,  ..., 0.3020, 0.3020, 0.29