In [80]:
import numpy as np
import torch
from sklearn.model_selection import train_test_split
import torch
from matplotlib import pyplot as plt 
import random
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset, Subset, SubsetRandomSampler
import os
import json
import torchvision 
import torchvision.transforms as transforms
from torch.utils.data import random_split
import torch.nn.functional as F
from sklearn.metrics import accuracy_score
from tqdm.auto import tqdm
from sklearn.model_selection import KFold
import pandas as pd
from time import time
from numpy.lib.stride_tricks import as_strided
from torchvision.datasets import ImageFolder

# 数据处理

# 车辆分类数据集

In [86]:
class_name = [
    "bus",
    "car",
    "truck"
]
PATH_CAR="./车辆分类数据集/车辆分类数据集"
# 变成dataset
transform = transforms.Compose([
    transforms.ToTensor()  # 将PIL图像转为Tensor [C, H, W]
])
dataset_car=ImageFolder(PATH_CAR, transform=transform)
image, label=dataset_car[0]
print(image.shape)

def split_dataset(root_path, test_size=0.2, random_seed=42):
    dataset_car=ImageFolder(PATH_CAR)
    # 获取每个类别的样本索引
    class_idx={}
    for idx, (_, label) in enumerate(dataset_car.imgs):
        if label not in class_idx:
            class_idx[label]=[]
        class_idx[label].append(idx)
    
    # 按类别划分训练集和测试集
    train_indices, test_indices = [], []
    for label, indices in class_idx.items():
        idx_train, idx_test = train_test_split(
            indices, 
            test_size=test_size, 
            random_state=random_seed,
            shuffle=True
        )
        train_indices.extend(idx_train)
        test_indices.extend(idx_test)
    return train_indices, test_indices


torch.Size([3, 120, 85])


In [89]:
def compute_mean_std(dataset_path, img_size=(224, 224), batch_size=32):
    """
    计算图像数据集的均值和标准差
    :param dataset_path: 数据集路径（需为ImageFolder格式）
    :param img_size: 统一调整的图像尺寸
    :param batch_size: 批量处理大小
    :return: (mean, std) 各通道的均值和标准差
    """
    # 预处理：调整尺寸 + 转为Tensor
    transform = transforms.Compose([
        transforms.Resize(img_size),
        transforms.ToTensor()  # 自动缩放到[0,1]
    ])
    
    # 加载数据集
    dataset = ImageFolder(dataset_path, transform=transform)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    
    # 初始化统计变量
    mean = 0.
    std = 0.
    nb_samples = 0.
    
    # 遍历数据集计算
    for data, _ in tqdm(loader):
        batch_samples = data.size(0)
        data = data.view(batch_samples, data.size(1), -1)  # 展平H,W维度
        mean += data.mean(2).sum(0)  # 各通道均值求和
        std += data.std(2).sum(0)    # 各通道标准差求和
        nb_samples += batch_samples
    
    # 计算全局均值和标准差
    mean /= nb_samples
    std /= nb_samples
    return mean.tolist(), std.tolist()

mean, std = compute_mean_std(PATH_CAR)
print(f"均值: {mean}")  # 例如 [0.485, 0.456, 0.406]
print(f"标准差: {std}")  # 例如 [0.229, 0.224, 0.225]

  0%|          | 0/43 [00:00<?, ?it/s]

均值: [0.4101674556732178, 0.4220322370529175, 0.4358880817890167]
标准差: [0.19289539754390717, 0.19039973616600037, 0.1875486969947815]


In [90]:
# 数据预处理
transform = transforms.Compose([
    transforms.Resize((80, 100)),
    transforms.RandomRotation(5), 
    transforms.ToTensor(),   #转为Tensor并自动缩放到0，1
    transforms.Lambda(lambda x: x + torch.randn_like(x) * 0.005),  # 提前加噪声
    transforms.Normalize(mean=[0.410, 0.422, 0.435], std=[0.193, 0.190, 0.187]),
])

# 加载完整数据集（应用预处理）
dataset = ImageFolder(root=PATH_CAR, transform=transform)

# 划分训练集和测试集（测试集占20%）
train_idx, test_idx = split_dataset(PATH_CAR, test_size=0.2)

# 创建SubsetRandomSampler（自动打乱）
train_sampler = SubsetRandomSampler(train_idx)
test_sampler = SubsetRandomSampler(test_idx)

# 创建DataLoader
train_loader = DataLoader(dataset, batch_size=32, sampler=train_sampler)
test_loader = DataLoader(dataset, batch_size=32, sampler=test_sampler)

In [92]:
print(train_loader.dataset)
for data, _ in train_loader:
    print(data.shape)
    break

Dataset ImageFolder
    Number of datapoints: 1357
    Root location: ./车辆分类数据集/车辆分类数据集
    StandardTransform
Transform: Compose(
               Resize(size=(80, 100), interpolation=bilinear, max_size=None, antialias=True)
               ToTensor()
               Normalize(mean=[0.41, 0.422, 0.435], std=[0.193, 0.19, 0.187])
           )
torch.Size([32, 3, 80, 100])


# 手写二维卷积

In [45]:
# 根据im2col变为矩阵乘法
def split_by_strides_np(X, kh, kw, s):
    N, C, H, W = X.shape
    oh = (H - kh) // s + 1
    ow = (W - kw) // s + 1
    print(X.strides)
    strides = (*X.strides[:-2], X.strides[-2]*s, X.strides[-1]*s, *X.strides[-2:])
    print(strides)
    A = as_strided(X, shape=(N,C,oh,ow,kh,kw), strides=strides)
    return A

np.tensordot可以指定对应的轴相乘，相乘完的轴就可以认为没了，然后将剩下的轴拼在一起
在使用的时候只要弄懂每个轴是什么物理意义，然后知道想要输出的形状是什么样子的就行了

例如这里A.shape(N,C,oh,ow,kh,kw),kernel.shape(n,c,kh,kw)我想要输出的样子是(N, n, oh, ow)所以axes[(1,4,5), (1,2,3)]

将C，c；kh，kh；kw，kw相乘，拼接成了(N,oh,ow,n)的形状，然后使用transpose调整一下位置即可

In [46]:
def conv_np(X, kernel, stride=1, padding=0):
    n, c, kh, kw = kernel.shape
    A =split_by_strides_np(X, kh, kw, stride)
    res=np.tensordot(A, kernel, axes=[(1,4,5), (1,2,3)])
    res = res.transpose((0,3,1,2))
    return  res

In [47]:
X1 = np.arange(1,19, dtype=np.int32).reshape(1, 2,3,3)
kernel = np.arange(1,9, dtype=np.int32).reshape(1, 2,2,2)
res = conv_np(X1, kernel)
print(res)

(72, 36, 12, 4)
(72, 36, 12, 4, 12, 4)


TypeError: as_strided(): argument 'input' (position 1) must be Tensor, not numpy.ndarray

In [55]:
from torch import as_strided

def split_by_strides(X, kh, kw, s):
    N, C, H, W = X.shape
    oh = (H - kh) // s + 1
    ow = (W - kw) // s + 1
    
    # 计算PyTorch张量的stride
    stride = list(X.stride())
    strides = (stride[0], stride[1], 
               stride[2]*s, stride[3]*s, 
               stride[2], stride[3])
    
    A = as_strided(X, size=(N, C, oh, ow, kh, kw), 
                  stride=strides)
    return A

def conv(X, kernel, stride=1, padding=0):
    if padding > 0:
        X = F.pad(X, (padding, padding, padding, padding))
    
    n, c, kh, kw = kernel.shape
    A = split_by_strides(X, kh, kw, stride)
    
    # 使用einsum实现张量收缩
    res = torch.einsum('nchwkl,ockl->nohw', A, kernel)
    return res

In [64]:
X1 = torch.randn((1, 2, 3, 3))
kernel = torch.randn((2, 2, 2, 2))
print(conv(X1, kernel))

tensor([[[[-10.4530,  -4.5276],
          [  4.6923,   5.0751]],

         [[  8.4299,   1.3304],
          [  5.3363,   7.8171]]]])


In [57]:
class MyConv2D(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0):
        super(MyConv2D, self).__init__()
        self.stride = stride
        self.padding = padding
        if isinstance(kernel_size, int):
            kernel_size=(kernel_size, kernel_size)
        self.weight = nn.Parameter(torch.randn((out_channels, in_channels) + kernel_size))
        self.bias = nn.Parameter(torch.randn((out_channels ,1 ,1)))
        
        
    def forward(self, x):
        return conv(x, self.weight, self.stride, self.padding) + self.bias

In [63]:
my_conv = MyConv2D(2, 2, 2)
X = torch.randn((1, 2, 3, 3))
print(my_conv.forward(X))

tensor([[[[ 2.0390,  1.8059],
          [ 2.6420,  1.2737]],

         [[-2.7758, -1.6841],
          [ 2.3810, -2.4902]]]], grad_fn=<AddBackward0>)
