In [1]:
import torch
from torch import nn
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import torch.optim as optim
import numpy as np

# 数据预处理
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # CIFAR10的均值和标准差
])

# 加载CIFAR10数据集
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# 定义ResNet18模型
def resnet18(pretrained=True):
    model = models.resnet18(pretrained=pretrained)
    # 修改全连接层以匹配CIFAR10的类别数
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 10)
    return model

# 实例化两个ResNet18模型
model_selector = resnet18(pretrained=True)  # 用于选择最有价值5%测试用例的模型
model_predictor = resnet18(pretrained=True)  # 用于对5%测试用例进行预测的模型

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_selector = model_selector.to(device)
model_predictor = model_predictor.to(device)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer_selector = optim.Adam(model_selector.parameters(), lr=0.001)  # 选择模型的优化器
optimizer_predictor = optim.Adam(model_predictor.parameters(), lr=0.001)  # 预测模型的优化器

# 训练模型
def train_model(model, dataloader, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

# 训练选择模型
train_model(model_selector, train_loader, optimizer_selector)  # 训练选择模型

# 训练预测模型
train_model(model_predictor, train_loader, optimizer_predictor)  # 训练预测模型在训练集上
train_model(model_predictor, test_loader, optimizer_predictor)  # 训练预测模型在测试集上

# 评估模型并计算置信度
model_selector.eval()
uncertainties = []
labels = []
with torch.no_grad():
    for x, y in test_loader:
        x, y = x.to(device), y.to(device)
        output = model_selector(x)  # 使用选择模型进行评估
        probabilities = torch.softmax(output, dim=1)
        max_prob, _ = torch.max(probabilities, dim=1)
        uncertainties.extend(max_prob.cpu().numpy())
        labels.extend(y.cpu().numpy())

# 选择最不自信的5%测试用例
threshold_index = int(len(uncertainties) * 0.05)
threshold = np.partition(uncertainties, -threshold_index)[threshold_index]
selected_indices = np.where(uncertainties < threshold)[0]

# 使用预测模型对挑选出的测试用例进行预测
correct_count = 0
total_count = 0
with torch.no_grad():
    for idx in selected_indices:
        img, label = test_dataset[idx]  # 获取图片和标签
        img = img.unsqueeze(0).to(device)  # 增加批次维度并移动到设备
        output = model_predictor(img)  # 使用预测模型进行预测
        _, predicted = torch.max(output, 1)
        if predicted.item() == label:
            correct_count += 1
        total_count += 1

# 计算正确分类率
accuracy = correct_count / total_count
print(f"正确分类率: {accuracy:.4f}")

Files already downloaded and verified
Files already downloaded and verified




OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 23.64 GiB of which 15.56 MiB is free. Process 1582390 has 548.00 MiB memory in use. Process 1590575 has 1.05 GiB memory in use. Process 1666952 has 490.00 MiB memory in use. Process 1671087 has 490.00 MiB memory in use. Process 1687277 has 490.00 MiB memory in use. Process 1718204 has 490.00 MiB memory in use. Process 1760312 has 490.00 MiB memory in use. Process 1815573 has 490.00 MiB memory in use. Process 1848696 has 490.00 MiB memory in use. Process 1870001 has 490.00 MiB memory in use. Process 1920080 has 490.00 MiB memory in use. Process 2073050 has 490.00 MiB memory in use. Process 2125588 has 490.00 MiB memory in use. Process 2857264 has 6.41 GiB memory in use. Process 3103881 has 514.00 MiB memory in use. Process 3224424 has 514.00 MiB memory in use. Process 3317903 has 996.00 MiB memory in use. Process 3341970 has 516.00 MiB memory in use. Process 3409199 has 3.60 GiB memory in use. Process 3426486 has 3.60 GiB memory in use. Process 3435035 has 612.00 MiB memory in use. Of the allocated memory 123.21 MiB is allocated by PyTorch, and 14.79 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF