# 1.mobienetv2

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torchvision.models import mobilenet_v2

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((224, 224)),   # 将 28x28 调整为 224x224 适用于 MobileNetV2
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)), 
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST 数据集标准化参数
])

# 加载 MNIST 数据集
dataset_path = './data'
train_dataset = datasets.MNIST(root=dataset_path, train=True, download=True, transform=transform)

# 只使用部分训练数据（例如 1000 个样本）
train_subset_size = 1000  # 只使用前 1000 个样本
train_subset = torch.utils.data.Subset(train_dataset, range(train_subset_size))
train_loader = torch.utils.data.DataLoader(train_subset, batch_size=8, shuffle=True, num_workers=2)
# 加载 MobileNetV2 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = mobilenet_v2(weights=None)  # 不使用预训练权重
model.classifier[1] = nn.Linear(model.last_channel, 10)  # MNIST 有 10 个类别
model = model.to(device)
print(f"Model is using device: {device}")
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
epochs = 3
model.train()
for epoch in range(epochs):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        inputs, labels = inputs.to(device), labels.to(device)

        # 前向传播
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # 反向传播
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:  # 每 100 个 batch 打印一次 loss
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 100:.3f}")
            running_loss = 0.0

print('Finished Training')
torch.save(model.state_dict(), 'mobilenet_v2_mnist.pth')  # 保存模型


Model is using device: cuda
[1, 100] loss: 1.665
[2, 100] loss: 0.737
[3, 100] loss: 0.371
Finished Training


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torchvision.models import mobilenet_v2

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((224, 224)),   # 将 28x28 调整为 224x224 适用于 MobileNetV2
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)), 
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST 数据集标准化参数
])

# 加载 MNIST 数据集
dataset_path = './data'
test_dataset = datasets.MNIST(root=dataset_path, train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

# 加载 MobileNetV2 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = mobilenet_v2(weights=None)
model.classifier[1] = nn.Linear(model.last_channel, 10)  # MNIST 有 10 个类别
model.load_state_dict(torch.load('mobilenet_v2_mnist.pth'))  # 加载之前保存的模型
model = model.to(device)

# 使用测试数据进行推理
model.eval()
test_iter = iter(test_loader)  # 迭代器，用于取出测试数据集中的一批数据
images, labels = next(test_iter)  # 取出一批数据
images, labels = images.to(device), labels.to(device)

# 推理
with torch.no_grad():  # 不需要计算梯度
    outputs = model(images)
    _, predicted = torch.max(outputs, 1)

# 打印实际标签和预测结果
print("Actual labels: ", labels[:10].cpu().numpy())
print("Predic labels: ", predicted[:10].cpu().numpy())


  model.load_state_dict(torch.load('mobilenet_v2_mnist.pth'))  # 加载之前保存的模型


Actual labels:  [7 2 1 0 4 1 4 9 5 9]
Predic labels:  [7 2 1 0 4 1 4 9 5 9]


### 2. inceptionv3

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torchvision.models import inception_v3

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((299, 299)),  # 将 28x28 调整为 299x299 适用于 InceptionV3
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),  # 将单通道转换为三通道
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST 数据集标准化参数
])

# 加载 MNIST 数据集
train_dataset = torch.utils.data.Subset(datasets.MNIST(root='./data', train=True, download=False, transform=transform), range(1000))
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)

# 加载 InceptionV3 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = inception_v3(weights=None)  # 不使用预训练权重
model.AuxLogits.fc = nn.Linear(model.AuxLogits.fc.in_features, 10)  # MNIST 有 10 个类别
model.fc = nn.Linear(model.fc.in_features, 10)
model = model.to(device)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
epochs = 3
model.train()
for epoch in range(epochs):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        inputs, labels = inputs.to(device), labels.to(device)

        # 前向传播
        optimizer.zero_grad()
        outputs, aux_outputs = model(inputs)
        loss1 = criterion(outputs, labels)
        loss2 = criterion(aux_outputs, labels)
        loss = loss1 + 0.4 * loss2  # InceptionV3 的损失函数结合了辅助分类器的输出

        # 反向传播
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:  # 每 100 个 batch 打印一次 loss
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 100:.3f}")
            running_loss = 0.0

print('Finished Training')
torch.save(model.state_dict(), 'inceptionv3_mnist.pth')  # 保存模型




Finished Training


In [None]:
    for i, (inputs, labels) in enumerate(train_loader, 0):
        inputs, labels = inputs.to(device), labels.to(device)

        # 前向传播
        optimizer.zero_grad()
        outputs, aux_outputs = model(inputs)
        loss1 = criterion(outputs, labels)
        loss2 = criterion(aux_outputs, labels)
        loss = loss1 + 0.4 * loss2  # InceptionV3 的损失函数结合了辅助分类器的输出

        # 反向传播
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:  # 每 100 个 batch 打印一次 loss
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 100:.3f}")
            running_loss = 0.0

print('Finished Training')
torch.save(model.state_dict(), 'inceptionv3_mnist.pth')  # 保存模型


In [3]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torchvision.models import inception_v3

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((299, 299)),  # 将 28x28 调整为 299x299 适用于 InceptionV3
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),  # 将单通道转换为三通道
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST 数据集标准化参数
])

# 加载测试数据集
test_dataset = datasets.MNIST(root='./data', train=False, download=False, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

# 加载 InceptionV3 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = inception_v3(weights=None)  # 不使用预训练权重
model.AuxLogits.fc = nn.Linear(model.AuxLogits.fc.in_features, 10)  # MNIST 有 10 个类别
model.fc = nn.Linear(model.fc.in_features, 10)
model.load_state_dict(torch.load('inceptionv3_mnist.pth'))
model.eval()
model.to(device)

# 推理
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the test images: {100 * correct / total:.2f}%')


  model.load_state_dict(torch.load('inceptionv3_mnist.pth'))


Accuracy of the network on the test images: 85.97%


### 3.inceptionv4

In [6]:
pip install pretrainedmodels

Collecting pretrainedmodels
[0m  Downloading pretrainedmodels-0.7.4.tar.gz (58 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting munch (from pretrainedmodels)
  Downloading munch-4.0.0-py2.py3-none-any.whl.metadata (5.9 kB)
Downloading munch-4.0.0-py2.py3-none-any.whl (9.9 kB)
Building wheels for collected packages: pretrainedmodels
  Building wheel for pretrainedmodels (setup.py) ... [?25ldone
[?25h  Created wheel for pretrainedmodels: filename=pretrainedmodels-0.7.4-py3-none-any.whl size=60945 sha256=255f9130517ee94424cab8f930684269c2b08b24bb2dda5704f43d42f3eaefed
  Stored in directory: /home/fushaomin/.cache/pip/wheels/4c/01/56/40a48f75dbdfe167a0cb70d3b48913369a00ec5c4e9fed5f2b
Successfully built pretrainedmodels
Installing collected packages: munch, pretrainedmodels
Successfully installed munch-4.0.0 pretrainedmodels-0.7.4
Note: you may need to restart the kernel to use updated packages.


In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torchvision.models import inception_v3
import pretrainedmodels

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((299, 299)),  # 将 28x28 调整为 299x299 适用于 InceptionV4
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),  # 将单通道转换为三通道
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST 数据集标准化参数
])

# 加载 MNIST 数据集
train_dataset = torch.utils.data.Subset(datasets.MNIST(root='./data', train=True, download=False, transform=transform), range(1000))
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)

# 加载 InceptionV4 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = pretrainedmodels.__dict__['inceptionv4'](num_classes=1000, pretrained=None)  # 不使用预训练权重
model.last_linear = nn.Linear(model.last_linear.in_features, 10)  # MNIST 有 10 个类别
model = model.to(device)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
epochs = 3
model.train()
for epoch in range(epochs):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        inputs, labels = inputs.to(device), labels.to(device)

        # 前向传播
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # 反向传播
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:  # 每 100 个 batch 打印一次 loss
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 100:.3f}")
            running_loss = 0.0

print('Finished Training')
torch.save(model.state_dict(), 'inceptionv4_mnist.pth')  # 保存模型


Finished Training


In [9]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
import pretrainedmodels

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((299, 299)),  # 将 28x28 调整为 299x299 适用于 InceptionV4
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),  # 将单通道转换为三通道
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST 数据集标准化参数
])

# 加载测试数据集
test_dataset = datasets.MNIST(root='./data', train=False, download=False, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

# 加载 InceptionV4 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = pretrainedmodels.__dict__['inceptionv4'](num_classes=1000, pretrained=None)
model.last_linear = nn.Linear(model.last_linear.in_features, 10)  # 不使用预训练权重
model.load_state_dict(torch.load('inceptionv4_mnist.pth'))
model.eval()
model.to(device)

# 推理
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the test images: {100 * correct / total:.2f}%')


  model.load_state_dict(torch.load('inceptionv4_mnist.pth'))


Accuracy of the network on the test images: 66.31%


### 4. Inception-ResNet-V2 

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import pretrainedmodels

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((299, 299)),  # 将 28x28 调整为 299x299 适用于 Inception-ResNet-V2
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),  # 将单通道转换为三通道
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST 数据集标准化参数
])

# 加载 MNIST 数据集
train_dataset = torch.utils.data.Subset(datasets.MNIST(root='./data', train=True, download=False, transform=transform), range(1000))
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)

# 加载 Inception-ResNet-V2 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = pretrainedmodels.__dict__['inceptionresnetv2'](num_classes=1000, pretrained=None)  # 不使用预训练权重
model.last_linear = nn.Linear(model.last_linear.in_features, 10)  # MNIST 有 10 个类别
model = model.to(device)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
epochs = 3
model.train()
for epoch in range(epochs):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        inputs, labels = inputs.to(device), labels.to(device)

        # 前向传播
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # 反向传播
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:  # 每 100 个 batch 打印一次 loss
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 100:.3f}")
            running_loss = 0.0

print('Finished Training')
torch.save(model.state_dict(), 'inceptionresnetv2_mnist.pth')  # 保存模型


Finished Training


In [None]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
import pretrainedmodels

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((299, 299)),  # 将 28x28 调整为 299x299 适用于 Inception-ResNet-V2
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),  # 将单通道转换为三通道
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST 数据集标准化参数
])

# 加载测试数据集
test_dataset = datasets.MNIST(root='../data', train=False, download=False, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)
        
# 加载 Inception-ResNet-V2 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = pretrainedmodels.__dict__['inceptionresnetv2'](num_classes=1000, pretrained=None)  # 不使用预训练权重
model.last_linear = nn.Linear(model.last_linear.in_features, 10)  # MNIST 有 10 个类别
model.load_state_dict(torch.load('inceptionresnetv2_mnist.pth'))
model.eval()
model.to(device)

# 推理
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the test images: {100 * correct / total:.2f}%')


  model.load_state_dict(torch.load('inceptionresnetv2_mnist.pth'))


Accuracy of the network on the test images: 80.08%


### 5.ResNet-V2-50 

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import pretrainedmodels

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 将 28x28 调整为 224x224 适用于 ResNet-V2-50
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),  # 将单通道转换为三通道
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST 数据集标准化参数
])

# 加载 MNIST 数据集
train_dataset = torch.utils.data.Subset(datasets.MNIST(root='../data', train=True, download=False, transform=transform), range(1000))
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)

# 加载 ResNet-V2-50 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = pretrainedmodels.__dict__['resnet50'](num_classes=1000, pretrained=None)  # 不使用预训练权重
model.last_linear = nn.Linear(model.last_linear.in_features, 10)  # MNIST 有 10 个类别
model = model.to(device)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
epochs = 3
model.train()
for epoch in range(epochs):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        inputs, labels = inputs.to(device), labels.to(device)

        # 前向传播
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # 反向传播
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:  # 每 100 个 batch 打印一次 loss
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 100:.3f}")
            running_loss = 0.0

print('Finished Training')
torch.save(model.state_dict(), 'resnetv2_50_mnist.pth')  # 保存模型




Finished Training


In [14]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
import pretrainedmodels

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 将 28x28 调整为 224x224 适用于 ResNet-V2-50
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),  # 将单通道转换为三通道
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST 数据集标准化参数
])

# 加载测试数据集
test_dataset = datasets.MNIST(root='./data', train=False, download=False, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

# 加载 ResNet-V2-50 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = pretrainedmodels.__dict__['resnet50'](num_classes=1000, pretrained=None)  # 不使用预训练权重
model.last_linear = nn.Linear(model.last_linear.in_features, 10)  # MNIST 有 10 个类别
model.load_state_dict(torch.load('resnetv2_50_mnist.pth'))
model.eval()
model = model.to(device)

# 推理
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the test images: {100 * correct / total:.2f}%')


  model.load_state_dict(torch.load('resnetv2_50_mnist.pth'))


Accuracy of the network on the test images: 36.41%


### 6.ResNet-V2-152 

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import pretrainedmodels

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 将 28x28 调整为 224x224 适用于 ResNet-V2-152
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),  # 将单通道转换为三通道
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST 数据集标准化参数
])

# 加载 MNIST 数据集
train_dataset = torch.utils.data.Subset(datasets.MNIST(root='../data', train=True, download=False, transform=transform), range(1001))
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)

# 加载 ResNet-V2-152 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = pretrainedmodels.__dict__['resnet152'](num_classes=1000, pretrained=None)  # 不使用预训练权重
model.last_linear = nn.Linear(model.last_linear.in_features, 10)  # MNIST 有 10 个类别
model = model.to(device)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
epochs = 3
model.train()
for epoch in range(epochs):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        inputs, labels = inputs.to(device), labels.to(device)

        # 前向传播
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # 反向传播
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:  # 每 100 个 batch 打印一次 loss
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 100:.3f}")
            running_loss = 0.0

print('Finished Training')
torch.save(model.state_dict(), 'resnetv2_152_mnist.pth')  # 保存模型


Finished Training


In [18]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
import pretrainedmodels

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 将 28x28 调整为 224x224 适用于 ResNet-V2-152
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),  # 将单通道转换为三通道
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST 数据集标准化参数
])

# 加载测试数据集
test_dataset = datasets.MNIST(root='./data', train=False, download=False, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

# 加载 ResNet-V2-152 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = pretrainedmodels.__dict__['resnet152'](num_classes=1000, pretrained=None)  # 不使用预训练权重
model.last_linear = nn.Linear(model.last_linear.in_features, 10)  # MNIST 有 10 个类别
model.load_state_dict(torch.load('resnetv2_152_mnist.pth'))
model.eval()
model = model.to(device)

# 推理
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the test images: {100 * correct / total:.2f}%')


  model.load_state_dict(torch.load('resnetv2_152_mnist.pth'))


Accuracy of the network on the test images: 82.89%


### 7.VGG-16 

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 将 28x28 调整为 224x224 适用于 VGG-16
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),  # 将单通道转换为三通道
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST 数据集标准化参数
])

# 加载 MNIST 数据集
train_dataset = torch.utils.data.Subset(datasets.MNIST(root='../data', train=True, download=False, transform=transform), range(1000))
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)

# 加载 VGG-16 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.vgg16(pretrained=False)  # 不使用预训练权重
model.classifier[6] = nn.Linear(model.classifier[6].in_features, 10)  # MNIST 有 10 个类别
model = model.to(device)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
epochs = 3
model.train()
for epoch in range(epochs):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        inputs, labels = inputs.to(device), labels.to(device)

        # 前向传播
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # 反向传播
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:  # 每 100 个 batch 打印一次 loss
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 100:.3f}")
            running_loss = 0.0

print('Finished Training')
torch.save(model.state_dict(), 'vgg16_mnist.pth')  # 保存模型


Finished Training


In [20]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms, models

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 将 28x28 调整为 224x224 适用于 VGG-16
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),  # 将单通道转换为三通道
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST 数据集标准化参数
])

# 加载测试数据集
test_dataset = datasets.MNIST(root='./data', train=False, download=False, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

# 加载 VGG-16 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.vgg16(pretrained=False)
model.classifier[6] = nn.Linear(model.classifier[6].in_features, 10)  # MNIST 有 10 个类别
model.load_state_dict(torch.load('vgg16_mnist.pth'))
model.eval()
model = model.to(device)

# 推理
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the test images: {100 * correct / total:.2f}%')


  model.load_state_dict(torch.load('vgg16_mnist.pth'))


Accuracy of the network on the test images: 76.19%


### 图片准备

In [27]:
from PIL import Image
import os

# 生成低分辨率图像
class LowResolutionGenerator:
    def __init__(self, input_dir, output_dir):
        self.input_dir = input_dir
        self.output_dir = output_dir
        os.makedirs(output_dir, exist_ok=True)

    def generate(self):
        for img_file in os.listdir(self.input_dir):
            if img_file.endswith('.jpg') or img_file.endswith('.png'):
                img_path = os.path.join(self.input_dir, img_file)
                high_res_image = Image.open(img_path).convert('L')  # 转换为灰度图像
                low_res_image = high_res_image.resize(
                    (high_res_image.width // 2, high_res_image.height // 2), Image.BICUBIC
                )
                low_res_image = low_res_image.resize(
                    (high_res_image.width, high_res_image.height), Image.BICUBIC
                )
                low_res_path = os.path.join(self.output_dir, img_file)
                low_res_image.save(low_res_path)

# 使用示例
input_directory = './data/Set5'  # 高分辨率图像输入目录
output_directory = './data/Set5_LR'  # 低分辨率图像输出目录

lr_generator = LowResolutionGenerator(input_directory, output_directory)
lr_generator.generate()

print('Low-resolution images generated successfully!')


Low-resolution images generated successfully!


### 8.SRCNN

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
from PIL import Image
import os

# 定义数据转换
transform = transforms.Compose([
    transforms.ToTensor(),  # 转换为 Tensor
])

# 修改 Set5 数据集类，添加 resize 步骤
class Set5Dataset(torch.utils.data.Dataset):
    def __init__(self, lr_dir, hr_dir, transform=None):
        self.lr_dir = lr_dir
        self.hr_dir = hr_dir
        self.transform = transform
        self.image_files = [f for f in os.listdir(lr_dir) if f.endswith('.png') or f.endswith('.jpg')]

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        lr_img_name = os.path.join(self.lr_dir, self.image_files[idx])
        hr_img_name = os.path.join(self.hr_dir, self.image_files[idx])
        low_res_image = Image.open(lr_img_name).convert('L')  # 转换为灰度图像
        high_res_image = Image.open(hr_img_name).convert('L')  # 转换为灰度图像

        # 调整图像大小为统一尺寸，例如 256x256
        low_res_image = low_res_image.resize((256, 256), Image.BICUBIC)
        high_res_image = high_res_image.resize((256, 256), Image.BICUBIC)

        if self.transform:
            high_res_image = self.transform(high_res_image)
            low_res_image = self.transform(low_res_image)
        return low_res_image, high_res_image  # 输入是低分辨率图像，目标是高分辨率图像


# 创建数据集和数据加载器
train_dataset = Set5Dataset(lr_dir='../data/Set5_LR', hr_dir='../data/Set5', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=2)

# 定义 SRCNN 模型
class SRCNN(nn.Module):
    def __init__(self):
        super(SRCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=9, padding=4)
        self.conv2 = nn.Conv2d(64, 32, kernel_size=5, padding=2)
        self.conv3 = nn.Conv2d(32, 1, kernel_size=5, padding=2)

    def forward(self, x):
        x = nn.functional.relu(self.conv1(x))
        x = nn.functional.relu(self.conv2(x))
        x = self.conv3(x)
        return x

# 加载 SRCNN 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SRCNN().to(device)

# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
epochs = 3
model.train()
for epoch in range(epochs):
    running_loss = 0.0
    for i, (inputs, targets) in enumerate(train_loader, 0):
        inputs, targets = inputs.to(device), targets.to(device)

        # 前向传播
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)  # 使用输入图像和输出图像之间的均方误差作为损失

        # 反向传播
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 10 == 9:  # 每 10 个 batch 打印一次 loss
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 10:.3f}")
            running_loss = 0.0

print('Finished Training')
torch.save(model.state_dict(), 'srcnn_set5.pth')  # 保存模型


Finished Training


In [None]:
import torch
import torch.nn as nn
from torchvision import transforms
from PIL import Image
import os

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((512, 512)),  # 将图像缩小为1000x1000像素
    transforms.ToTensor(),  # 转换为 Tensor
])

# 定义 SRCNN 模型
class SRCNN(nn.Module):
    def __init__(self):
        super(SRCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=9, padding=4)
        self.conv2 = nn.Conv2d(64, 32, kernel_size=5, padding=2)
        self.conv3 = nn.Conv2d(32, 1, kernel_size=5, padding=2)

    def forward(self, x):
        x = nn.functional.relu(self.conv1(x))
        x = nn.functional.relu(self.conv2(x))
        x = self.conv3(x)
        return x

# 加载模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SRCNN().to(device)
model.load_state_dict(torch.load('srcnn_set5.pth'))
model.eval()

# 加载测试图像并进行推理
test_dir = '../data/Set5_LR'  # 低分辨率图像文件夹
test_images = [f for f in os.listdir(test_dir) if f.endswith('.png') or f.endswith('.jpg')]

correct = 0
total = 0

for img_name in test_images:
    lr_img_path = os.path.join(test_dir, img_name)
    low_res_image = Image.open(lr_img_path).convert('L')  # 转换为灰度图像
    input_tensor = transform(low_res_image).unsqueeze(0).to(device)

    # 推理
    with torch.no_grad():
        output = model(input_tensor)

    # 计算准确率（这里假设有高分辨率的 ground truth）
    hr_img_path = os.path.join('../data/Set5', img_name)
    high_res_image = transform(Image.open(hr_img_path).convert('L')).unsqueeze(0).to(device)

    # 简单计算准确率（以 MSE 作为衡量标准，越小越好）
    mse = nn.functional.mse_loss(output, high_res_image).item()
    if mse < 0.01:  # 假设一个阈值来判断是否正确预测
        correct += 1
    total += 1

    # 显示预测结果
    print(f'Processed image: {img_name}, MSE: {mse:.4f}')

accuracy = 100 * correct / total
print(f'Accuracy of the network on the test images: {accuracy:.2f}%')
print('Finished Inference')


  model.load_state_dict(torch.load('srcnn_set5.pth'))


Processed image: 5.jpg, MSE: 0.0221
Processed image: 1.jpg, MSE: 0.0073
Processed image: 3.jpg, MSE: 0.0012
Processed image: 4.jpg, MSE: 0.0022
Processed image: 2.jpg, MSE: 0.0155
Accuracy of the network on the test images: 60.00%
Finished Inference


### 9.VGG-19 

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, models
from PIL import Image

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 将图像调整为适合 VGG19 的输入大小
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 使用 ImageNet 的均值和标准差进行标准化
])

# 自定义数据集加载器
class Set5Dataset(torch.utils.data.Dataset):
    def __init__(self, lr_dir, hr_dir, transform=None):
        self.lr_dir = lr_dir
        self.hr_dir = hr_dir
        self.image_names = [f for f in os.listdir(lr_dir) if os.path.isfile(os.path.join(lr_dir, f))]
        self.transform = transform

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        lr_img_name = os.path.join(self.lr_dir, self.image_names[idx])
        hr_img_name = os.path.join(self.hr_dir, self.image_names[idx])
        lr_image = Image.open(lr_img_name).convert('RGB')
        hr_image = Image.open(hr_img_name).convert('RGB')
        if self.transform:
            lr_image = self.transform(lr_image)
            hr_image = self.transform(hr_image)
        return lr_image, hr_image

# 加载数据集（使用 Set5 数据集的低分辨率和高清图像）
train_dataset = Set5Dataset(lr_dir='../data/Set5_LR', hr_dir='../data/Set5', transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=2)

# 加载 VGG19 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.vgg19(pretrained=False)  # 不使用预训练权重
model.classifier[6] = nn.Linear(4096, 3 * 224 * 224)  # 输出大小与高清图像一致
model = model.to(device)

# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
epochs = 3
model.train()
for epoch in range(epochs):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader, 0):
        inputs = inputs.to(device)
        labels = labels.to(device)

        # 前向传播
        optimizer.zero_grad()
        outputs = model(inputs)
        outputs = outputs.view_as(labels)  # 调整输出形状与标签一致
        loss = criterion(outputs, labels)

        # 反向传播
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:  # 每 100 个 batch 打印一次 loss
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 100:.3f}")
            running_loss = 0.0

print('Finished Training')
torch.save(model.state_dict(), 'vgg19_set5.pth')  # 保存模型


Finished Training


In [None]:
import os
import torch
import torch.nn as nn
from torchvision import transforms, models
from PIL import Image

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 将图像调整为适合 VGG19 的输入大小
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 使用 ImageNet 的均值和标准差进行标准化
])

# 加载 VGG19 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = models.vgg19(pretrained=False)
model.classifier[6] = nn.Linear(4096, 3 * 224 * 224)
model = model.to(device)

# 加载训练好的模型权重
model.load_state_dict(torch.load('vgg19_set5.pth'))
model.eval()

# 推理函数
def infer_image(lr_image_path, hr_image_path):
    lr_image = Image.open(lr_image_path).convert('RGB')
    hr_image = Image.open(hr_image_path).convert('RGB')
    input_tensor = transform(lr_image).unsqueeze(0).to(device)
    hr_tensor = transform(hr_image).unsqueeze(0).to(device)  # 确保高分辨率图像也在相同的设备上
    
    with torch.no_grad():
        output = model(input_tensor)
        output = output.view(1, 3, 224, 224).to(device)
    
    # 计算 MSE 作为准确度的指标之一
    mse_loss = nn.MSELoss()(output, hr_tensor)
    print(f"MSE Loss for {lr_image_path}: {mse_loss.item()}")

# 推理数据集中的每张图像
lr_dir = '../data/Set5_LR'
hr_dir = '../data/Set5'
image_names = [f for f in os.listdir(lr_dir) if os.path.isfile(os.path.join(lr_dir, f))]

for img_name in image_names:
    lr_image_path = os.path.join(lr_dir, img_name)
    hr_image_path = os.path.join(hr_dir, img_name)
    infer_image(lr_image_path, hr_image_path)

print('Finished Inference')


  model.load_state_dict(torch.load('vgg19_set5.pth'))


MSE Loss for ./data/Set5_LR/5.jpg: 3.622990369796753
MSE Loss for ./data/Set5_LR/1.jpg: 0.9915202260017395
MSE Loss for ./data/Set5_LR/3.jpg: 1.3173009157180786
MSE Loss for ./data/Set5_LR/4.jpg: 2.0026438236236572
MSE Loss for ./data/Set5_LR/2.jpg: 0.8065729141235352
Finished Inference


### 10.ResNet-SRGAN 

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torchvision.models import resnet18
from PIL import Image
from torch.utils.data import Dataset, DataLoader

# 定义数据集类
class ImageDataset(Dataset):
    def __init__(self, lr_dir, hr_dir, transform=None):
        self.lr_dir = lr_dir
        self.hr_dir = hr_dir
        self.image_names = [f for f in os.listdir(lr_dir) if os.path.isfile(os.path.join(lr_dir, f))]
        self.transform = transform

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        img_name = self.image_names[idx]
        lr_image_path = os.path.join(self.lr_dir, img_name)
        hr_image_path = os.path.join(self.hr_dir, img_name)
        lr_image = Image.open(lr_image_path).convert('RGB')
        hr_image = Image.open(hr_image_path).convert('RGB')

        if self.transform:
            lr_image = self.transform(lr_image)
            hr_image = self.transform(hr_image)

        return lr_image, hr_image

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 将图像调整为适合 ResNet 的输入大小
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 使用 ImageNet 的均值和标准差进行标准化
])

# 加载数据集
lr_dir = '../data/Set5_LR'
hr_dir = '../data/Set5'
dataset = ImageDataset(lr_dir, hr_dir, transform=transform)
train_loader = DataLoader(dataset, batch_size=2, shuffle=True, num_workers=2)

# 加载 ResNet 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = resnet18(pretrained=False)
model.fc = nn.Linear(model.fc.in_features, 3 * 224 * 224)  # 输出与高分辨率图像大小匹配
model = model.to(device)

# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
epochs = 3
model.train()
for epoch in range(epochs):
    running_loss = 0.0
    for i, (lr_images, hr_images) in enumerate(train_loader, 0):
        lr_images, hr_images = lr_images.to(device), hr_images.to(device)

        # 前向传播
        optimizer.zero_grad()
        outputs = model(lr_images)
        outputs = outputs.view(-1, 3, 224, 224)  # 将输出重塑为图像大小
        loss = criterion(outputs, hr_images)

        # 反向传播
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 10 == 9:  # 每 10 个 batch 打印一次 loss
            print(f"[Epoch {epoch + 1}, Batch {i + 1}] loss: {running_loss / 10:.3f}")
            running_loss = 0.0

print('Finished Training')
torch.save(model.state_dict(), 'resnet_srgan_set5.pth')  # 保存模型


Finished Training


In [None]:
import os
import torch
import torch.nn as nn
from torchvision import transforms
from PIL import Image

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 将图像调整为适合 ResNet-SRGAN 的输入大小
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 使用 ImageNet 的均值和标准差进行标准化
])

# 加载 ResNet-SRGAN 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = resnet18(pretrained=False)
model.fc = nn.Linear(model.fc.in_features, 3 * 224 * 224)  # 输出与高分辨率图像大小匹配
model.load_state_dict(torch.load('resnet_srgan_set5.pth'))  # 加载训练好的模型权重
model = model.to(device)
model.eval()

# 推理函数
def infer_image(lr_image_path, hr_image_path):
    lr_image = Image.open(lr_image_path).convert('RGB')
    hr_image = Image.open(hr_image_path).convert('RGB')
    input_tensor = transform(lr_image).unsqueeze(0).to(device)
    hr_tensor = transform(hr_image).unsqueeze(0).to(device)  # 确保高分辨率图像也在相同的设备上
    
    with torch.no_grad():
        output = model(input_tensor)
        output = output.view(1, 3, 224, 224).to(device)
    
    # 计算 MSE 作为准确度的指标之一
    mse_loss = nn.MSELoss()(output, hr_tensor)
    print(f"MSE Loss for {lr_image_path}: {mse_loss.item()}")

# 推理数据集中的每张图像
lr_dir = '../data/Set5_LR'
hr_dir = '../data/Set5'
image_names = [f for f in os.listdir(lr_dir) if os.path.isfile(os.path.join(lr_dir, f))]

for img_name in image_names:
    lr_image_path = os.path.join(lr_dir, img_name)
    hr_image_path = os.path.join(hr_dir, img_name)
    infer_image(lr_image_path, hr_image_path)

print('Finished Inference')


  model.load_state_dict(torch.load('resnet_srgan_set5.pth'))  # 加载训练好的模型权重


MSE Loss for ./data/Set5_LR/5.jpg: 5890.63671875
MSE Loss for ./data/Set5_LR/1.jpg: 233.42489624023438
MSE Loss for ./data/Set5_LR/3.jpg: 605.3394165039062
MSE Loss for ./data/Set5_LR/4.jpg: 3221.66943359375
MSE Loss for ./data/Set5_LR/2.jpg: 92.18882751464844
Finished Inference


### 11.ResNet-DPED

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torchvision.models import resnet18
from PIL import Image
from torch.utils.data import Dataset, DataLoader

# 定义数据集类
class ImageDataset(Dataset):
    def __init__(self, lr_dir, hr_dir, transform=None):
        self.lr_dir = lr_dir
        self.hr_dir = hr_dir
        self.image_names = [f for f in os.listdir(lr_dir) if os.path.isfile(os.path.join(lr_dir, f))]
        self.transform = transform

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        img_name = self.image_names[idx]
        lr_image_path = os.path.join(self.lr_dir, img_name)
        hr_image_path = os.path.join(self.hr_dir, img_name)
        lr_image = Image.open(lr_image_path).convert('RGB')
        hr_image = Image.open(hr_image_path).convert('RGB')

        if self.transform:
            lr_image = self.transform(lr_image)
            hr_image = self.transform(hr_image)

        return lr_image, hr_image

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 将图像调整为适合 ResNet-DPED 的输入大小
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 使用 ImageNet 的均值和标准差进行标准化
])

# 加载数据集
lr_dir = '../data/Set5_LR'
hr_dir = '../data/Set5'
dataset = ImageDataset(lr_dir, hr_dir, transform=transform)
train_loader = DataLoader(dataset, batch_size=2, shuffle=True, num_workers=2)

# 加载 ResNet-DPED 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = resnet18(pretrained=False)
model.fc = nn.Linear(model.fc.in_features, 3 * 224 * 224)  # 输出与高分辨率图像大小匹配
model = model.to(device)

# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
epochs = 3
model.train()
for epoch in range(epochs):
    running_loss = 0.0
    for i, (lr_images, hr_images) in enumerate(train_loader, 0):
        lr_images, hr_images = lr_images.to(device), hr_images.to(device)

        # 前向传播
        optimizer.zero_grad()
        outputs = model(lr_images)
        outputs = outputs.view(-1, 3, 224, 224)  # 将输出重塑为图像大小
        loss = criterion(outputs, hr_images)

        # 反向传播
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 10 == 9:  # 每 10 个 batch 打印一次 loss
            print(f"[Epoch {epoch + 1}, Batch {i + 1}] loss: {running_loss / 10:.3f}")
            running_loss = 0.0

print('Finished Training')
torch.save(model.state_dict(), 'resnet_dped_set5.pth')  # 保存模型


Finished Training


In [None]:
import os
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision.models import resnet18
from PIL import Image

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # 将图像调整为适合 ResNet-DPED 的输入大小
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # 使用 ImageNet 的均值和标准差进行标准化
])

# 加载 ResNet-DPED 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = resnet18(pretrained=False)
model.fc = nn.Linear(model.fc.in_features, 3 * 224 * 224)  # 输出与高分辨率图像大小匹配
model.load_state_dict(torch.load('resnet_dped_set5.pth'))  # 加载训练好的模型权重
model = model.to(device)
model.eval()

# 推理函数
def infer_image(lr_image_path, hr_image_path):
    lr_image = Image.open(lr_image_path).convert('RGB')
    hr_image = Image.open(hr_image_path).convert('RGB')
    input_tensor = transform(lr_image).unsqueeze(0).to(device)
    hr_tensor = transform(hr_image).unsqueeze(0).to(device)  # 确保高分辨率图像也在相同的设备上
    
    with torch.no_grad():
        output = model(input_tensor)
        output = output.view(1, 3, 224, 224).to(device)
    
    # 计算 MSE 作为准确度的指标之一
    mse_loss = nn.MSELoss()(output, hr_tensor)
    print(f"MSE Loss for {lr_image_path}: {mse_loss.item()}")

# 推理数据集中的每张图像
lr_dir = '../data/Set5_LR'
hr_dir = '../data/Set5'
image_names = [f for f in os.listdir(lr_dir) if os.path.isfile(os.path.join(lr_dir, f))]

for img_name in image_names:
    lr_image_path = os.path.join(lr_dir, img_name)
    hr_image_path = os.path.join(hr_dir, img_name)
    infer_image(lr_image_path, hr_image_path)

print('Finished Inference')


  model.load_state_dict(torch.load('resnet_dped_set5.pth'))  # 加载训练好的模型权重


MSE Loss for ./data/Set5_LR/5.jpg: 92389.9921875
MSE Loss for ./data/Set5_LR/1.jpg: 338.0931091308594
MSE Loss for ./data/Set5_LR/3.jpg: 300.0672607421875
MSE Loss for ./data/Set5_LR/4.jpg: 1889.5848388671875
MSE Loss for ./data/Set5_LR/2.jpg: 8488.494140625
Finished Inference


### 12.U-Net 

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image

# 定义 U-Net 模型
class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()
        # 定义编码器部分
        self.encoder1 = self.conv_block(3, 64)
        self.encoder2 = self.conv_block(64, 128)
        self.encoder3 = self.conv_block(128, 256)
        self.encoder4 = self.conv_block(256, 512)

        # 定义中间层
        self.middle = self.conv_block(512, 1024)

        # 定义解码器部分
        self.decoder4 = self.conv_block(1024 + 512, 512)
        self.decoder3 = self.conv_block(512 + 256, 256)
        self.decoder2 = self.conv_block(256 + 128, 128)
        self.decoder1 = self.conv_block(128 + 64, 64)

        # 定义输出层
        self.output_layer = nn.Conv2d(64, 3, kernel_size=1)

    def conv_block(self, in_channels, out_channels):
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        # 编码器部分
        enc1 = self.encoder1(x)
        enc2 = self.encoder2(self.downsample(enc1))
        enc3 = self.encoder3(self.downsample(enc2))
        enc4 = self.encoder4(self.downsample(enc3))

        # 中间层
        middle = self.middle(self.downsample(enc4))

        # 解码器部分
        dec4 = self.upsample(middle, enc4)
        dec4 = self.decoder4(torch.cat([dec4, enc4], dim=1))
        dec3 = self.upsample(dec4, enc3)
        dec3 = self.decoder3(torch.cat([dec3, enc3], dim=1))
        dec2 = self.upsample(dec3, enc2)
        dec2 = self.decoder2(torch.cat([dec2, enc2], dim=1))
        dec1 = self.upsample(dec2, enc1)
        dec1 = self.decoder1(torch.cat([dec1, enc1], dim=1))

        # 输出层
        return self.output_layer(dec1)

    def downsample(self, x):
        return nn.MaxPool2d(kernel_size=2, stride=2)(x)

    def upsample(self, x, target_feature_map):
        return nn.functional.interpolate(x, size=target_feature_map.shape[2:], mode='bilinear', align_corners=True)

# 定义数据集类
class ImageDataset(Dataset):
    def __init__(self, lr_dir, hr_dir, transform=None):
        self.lr_dir = lr_dir
        self.hr_dir = hr_dir
        self.image_names = [f for f in os.listdir(lr_dir) if os.path.isfile(os.path.join(lr_dir, f))]
        self.transform = transform

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        img_name = self.image_names[idx]
        lr_image_path = os.path.join(self.lr_dir, img_name)
        hr_image_path = os.path.join(self.hr_dir, img_name)
        lr_image = Image.open(lr_image_path).convert('RGB')
        hr_image = Image.open(hr_image_path).convert('RGB')

        if self.transform:
            lr_image = self.transform(lr_image)
            hr_image = self.transform(hr_image)

        return lr_image, hr_image

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # 将图像调整为适合 U-Net 的输入大小
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# 加载数据集
lr_dir = '../data/Set5_LR'
hr_dir = '../data/Set5'
dataset = ImageDataset(lr_dir, hr_dir, transform=transform)
train_loader = DataLoader(dataset, batch_size=2, shuffle=True, num_workers=2)

# 加载 U-Net 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = UNet().to(device)

# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
epochs = 3
model.train()
for epoch in range(epochs):
    running_loss = 0.0
    for i, (lr_images, hr_images) in enumerate(train_loader, 0):
        lr_images, hr_images = lr_images.to(device), hr_images.to(device)

        # 前向传播
        optimizer.zero_grad()
        outputs = model(lr_images)
        loss = criterion(outputs, hr_images)

        # 反向传播
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 10 == 9:  # 每 10 个 batch 打印一次 loss
            print(f"[Epoch {epoch + 1}, Batch {i + 1}] loss: {running_loss / 10:.3f}")
            running_loss = 0.0

print('Finished Training')
torch.save(model.state_dict(), 'unet_set5.pth')  # 保存模型


Finished Training


In [None]:
import os
import torch
import torch.nn as nn
from torchvision import transforms
from PIL import Image

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # 将图像调整为适合 U-Net 的输入大小
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# 加载 U-Net 模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = UNet().to(device)
model.load_state_dict(torch.load('unet_set5.pth'))  # 加载训练好的模型权重
model.eval()

# 推理函数
def infer_image(lr_image_path, hr_image_path):
    lr_image = Image.open(lr_image_path).convert('RGB')
    hr_image = Image.open(hr_image_path).convert('RGB')
    input_tensor = transform(lr_image).unsqueeze(0).to(device)
    hr_tensor = transform(hr_image).unsqueeze(0).to(device)  # 确保高分辨率图像也在相同的设备上
    
    with torch.no_grad():
        output = model(input_tensor)
    
    # 计算 MSE 作为准确度的指标之一
    mse_loss = nn.MSELoss()(output, hr_tensor)
    print(f"MSE Loss for {lr_image_path}: {mse_loss.item()}")

# 推理数据集中的每张图像
lr_dir = '../data/Set5_LR'
hr_dir = '../data/Set5'
image_names = [f for f in os.listdir(lr_dir) if os.path.isfile(os.path.join(lr_dir, f))]

for img_name in image_names:
    lr_image_path = os.path.join(lr_dir, img_name)
    hr_image_path = os.path.join(hr_dir, img_name)
    infer_image(lr_image_path, hr_image_path)

print('Finished Inference')


  model.load_state_dict(torch.load('unet_set5.pth'))  # 加载训练好的模型权重


MSE Loss for ./data/Set5_LR/5.jpg: 0.46087774634361267
MSE Loss for ./data/Set5_LR/1.jpg: 0.524380624294281
MSE Loss for ./data/Set5_LR/3.jpg: 0.6777042150497437
MSE Loss for ./data/Set5_LR/4.jpg: 0.8753786087036133
MSE Loss for ./data/Set5_LR/2.jpg: 0.1537054032087326
Finished Inference


### 13.Nvidia-SPADE

In [47]:
import os
import sys
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from collections import OrderedDict
from options.train_options import TrainOptions
import data
from util.iter_counter import IterationCounter
from util.visualizer import Visualizer
from trainers.pix2pix_trainer import Pix2PixTrainer

# 定义数据集类
class ImageDataset(Dataset):
    def __init__(self, lr_dir, hr_dir, transform=None):
        self.lr_dir = lr_dir
        self.hr_dir = hr_dir
        self.image_names = [f for f in os.listdir(lr_dir) if os.path.isfile(os.path.join(lr_dir, f))]
        self.transform = transform

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        img_name = self.image_names[idx]
        lr_image_path = os.path.join(self.lr_dir, img_name)
        hr_image_path = os.path.join(self.hr_dir, img_name)
        lr_image = Image.open(lr_image_path).convert('RGB')
        hr_image = Image.open(hr_image_path).convert('RGB')

        if self.transform:
            lr_image = self.transform(lr_image)
            hr_image = self.transform(hr_image)

        return lr_image, hr_image

# 定义数据转换
transform = transforms.Compose([
    transforms.Resize((256, 256)),  # 将图像调整为适合 SPADE 的输入大小
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# 加载数据集
lr_dir = './data/Set5_LR'
hr_dir = './data/Set5'
dataset = ImageDataset(lr_dir, hr_dir, transform=transform)
train_loader = DataLoader(dataset, batch_size=2, shuffle=True, num_workers=2)

# parse options
opt = TrainOptions().parse()

# print options to help debugging
print(' '.join(sys.argv))

# create trainer for our model
trainer = Pix2PixTrainer(opt)

# create tool for counting iterations
iter_counter = IterationCounter(opt, len(train_loader))

# create tool for visualization
visualizer = Visualizer(opt)

# 训练模型
for epoch in iter_counter.training_epochs():
    iter_counter.record_epoch_start(epoch)
    for i, data_i in enumerate(train_loader, start=iter_counter.epoch_iter):
        iter_counter.record_one_iteration()

        # Training
        # train generator
        if i % opt.D_steps_per_G == 0:
            trainer.run_generator_one_step(data_i)

        # train discriminator
        trainer.run_discriminator_one_step(data_i)

        # Visualizations
        if iter_counter.needs_printing() and i % 10 == 9:
            losses = trainer.get_latest_losses()
            print(f"[Epoch {epoch + 1}, Batch {i + 1}] loss: {losses}")
            visualizer.print_current_errors(epoch, iter_counter.epoch_iter,
                                            losses, iter_counter.time_per_iter)
            visualizer.plot_current_errors(losses, iter_counter.total_steps_so_far)

        if iter_counter.needs_displaying():
            visuals = OrderedDict([('input_label', data_i[0]),
                                   ('synthesized_image', trainer.get_latest_generated()),
                                   ('real_image', data_i[1])])
            visualizer.display_current_results(visuals, epoch, iter_counter.total_steps_so_far)

        if iter_counter.needs_saving():
            print('saving the latest model (epoch %d, total_steps %d)' %
                  (epoch, iter_counter.total_steps_so_far))
            trainer.save('latest')
            iter_counter.record_current_iter()

    trainer.update_learning_rate(epoch)
    iter_counter.record_epoch_end()

    if epoch % opt.save_epoch_freq == 0 or \
       epoch == iter_counter.total_epochs:
        print('saving the model at the end of epoch %d, iters %d' %
              (epoch, iter_counter.total_steps_so_far))
        trainer.save('latest')
        trainer.save(epoch)

print('Training was successfully finished.')


ModuleNotFoundError: No module named 'options'

In [48]:
# 数据集
from torchvision.datasets import VOCSegmentation
from torchvision import transforms

# 数据增强与预处理
transform = transforms.Compose([
    transforms.Resize((128, 128)),  # 缩小图像加快训练
    transforms.ToTensor(),
])

# 加载 PASCAL VOC 数据集
train_dataset = VOCSegmentation(root='./data', year='2012', image_set='train', download=True, transform=transform)
val_dataset = VOCSegmentation(root='./data', year='2012', image_set='val', download=True, transform=transform)


Downloading http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar to ./data/VOCtrainval_11-May-2012.tar


  0%|          | 295k/2.00G [00:06<12:05:30, 45.9kB/s]


KeyboardInterrupt: 

In [None]:
# 图片清理

import os

def clean_dataset(voc_dir):
    annotations_dir = os.path.join(voc_dir, 'Annotations')
    images_dir = os.path.join(voc_dir, 'JPEGImages')
    imagesets_dir = os.path.join(voc_dir, 'ImageSets', 'Segmentation')

    # 1. 清理缺少图片的标注文件
    annotations = set(os.path.splitext(f)[0] for f in os.listdir(annotations_dir))
    images = set(os.path.splitext(f)[0] for f in os.listdir(images_dir))
    missing_images = annotations - images
    for missing in missing_images:
        os.remove(os.path.join(annotations_dir, f"{missing}.xml"))
        print(f"Deleted annotation: {missing}.xml")

    # 2. 更新 ImageSets 列表
    for split in ['train.txt', 'val.txt', 'test.txt']:
        split_file = os.path.join(imagesets_dir, split)
        if not os.path.exists(split_file):
            continue
        with open(split_file, 'r') as f:
            image_ids = [line.strip() for line in f]
        valid_ids = [img_id for img_id in image_ids if img_id in images]
        with open(split_file, 'w') as f:
            f.write('\n'.join(valid_ids))
            print(f"Updated {split} with {len(valid_ids)} valid images.")

# 执行清理
clean_dataset('./data/VOCdevkit/VOC2007')


Updated train.txt with 155 valid images.
Updated val.txt with 175 valid images.
Updated test.txt with 210 valid images.


In [54]:
# VOC2007数据集验证代码
import os

# 路径设置
data_dir = "./data/VOCdevkit/VOC2007"
annotations_dir = os.path.join(data_dir, "Annotations")
images_dir = os.path.join(data_dir, "JPEGImages")

# 验证是否所有标注文件都有对应的图片
annotation_files = set(os.path.splitext(f)[0] for f in os.listdir(annotations_dir))
image_files = set(os.path.splitext(f)[0] for f in os.listdir(images_dir))

missing_annotations = image_files - annotation_files
missing_images = annotation_files - image_files

print("Missing Annotations:", missing_annotations)
print("Missing Images:", missing_images)
# 验证 ImageSets/Segmentation 中的文件是否存在于 JPEGImages 中
imageset_file = os.path.join(data_dir, "ImageSets/Segmentation/train.txt")
with open(imageset_file, "r") as f:
    image_ids = [line.strip() for line in f]

missing_images = [img_id for img_id in image_ids if not os.path.exists(os.path.join(images_dir, f"{img_id}.jpg"))]
print("Missing images from ImageSets:", missing_images)


Missing Annotations: set()
Missing Images: set()
Missing images from ImageSets: []


In [58]:
import os

voc_root = './data/VOCdevkit/VOC2007'
annotations_dir = os.path.join(voc_root, 'Annotations')
images_dir = os.path.join(voc_root, 'JPEGImages')
imagesets_dir = os.path.join(voc_root, 'ImageSets', 'Segmentation')

# 检查核心目录是否存在
assert os.path.isdir(annotations_dir), "Annotations folder is missing!"
assert os.path.isdir(images_dir), "JPEGImages folder is missing!"
assert os.path.isdir(imagesets_dir), "ImageSets/Segmentation folder is missing!"

# 检查关键文件
train_file = os.path.join(imagesets_dir, 'train.txt')
val_file = os.path.join(imagesets_dir, 'val.txt')
assert os.path.isfile(train_file), "train.txt is missing!"
assert os.path.isfile(val_file), "val.txt is missing!"

print("Dataset structure is correct!")


Dataset structure is correct!


In [66]:
import os

# 数据路径
segmentation_dir = './data/VOCdevkit/VOC2007/SegmentationClass'
images_dir = './data/VOCdevkit/VOC2007/JPEGImages'
imagesets_file = './data/VOCdevkit/VOC2007/ImageSets/Segmentation/train.txt'

# 加载 train.txt 文件
with open(imagesets_file, 'r') as f:
    image_ids = [line.strip() for line in f]

# 筛选有对应 Mask 文件的图片
valid_ids = [img_id for img_id in image_ids if os.path.exists(os.path.join(segmentation_dir, f"{img_id}.png"))]

# 打印结果
print(f"Found {len(valid_ids)} valid images with masks.")
print(f"Missing masks: {len(image_ids) - len(valid_ids)}")

# 更新 train.txt 文件（可选）
with open(imagesets_file, 'w') as f:
    f.write('\n'.join(valid_ids))
    print(f"Updated {imagesets_file} with valid entries.")


Found 0 valid images with masks.
Missing masks: 155
Updated ./data/VOCdevkit/VOC2007/ImageSets/Segmentation/train.txt with valid entries.


### 14.|CNet

In [None]:
import os
import torch
import torch.nn as nn
from torchvision.datasets import VOCSegmentation
from torchvision import transforms
from torch.utils.data import DataLoader
import torchvision.transforms.functional as F_transforms

# 定义 ICNet 模型
class ICNet(nn.Module):
    def __init__(self, num_classes=21):
        super(ICNet, self).__init__()
        self.conv = nn.Conv2d(3, num_classes, kernel_size=3, padding=1)

    def forward(self, x):
        x = self.conv(x)
        return nn.functional.interpolate(x, size=(128, 128), mode="bilinear", align_corners=False)

# 数据加载器中的标签处理
def mask_transform(mask):
    mask = mask.resize((128, 128))
    mask = F_transforms.pil_to_tensor(mask).squeeze(0)
    mask = mask.long()

    # 将超出范围的值标记为无效值
    mask[(mask < 0) | (mask >= 21)] = -1
    return mask

# 主函数
def main():
    # 数据加载与预处理
    transform = transforms.Compose([
        transforms.Resize((128, 128)),
        transforms.ToTensor()
    ])
    dataset = VOCSegmentation(
        root='../data',
        year='2007',
        image_set='train',
        download=False,
        transform=transform,
        target_transform=mask_transform
    )
    data_loader = DataLoader(dataset, batch_size=4, shuffle=True)

    # 模型定义
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = ICNet(num_classes=21).to(device)
    print(model)
    criterion = nn.CrossEntropyLoss(ignore_index=-1)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # 训练循环
    for epoch in range(5):  # 假设训练 5 个 epoch
        model.train()
        total_loss = 0
        for batch_idx, (images, masks) in enumerate(data_loader):
            images, masks = images.to(device), masks.to(device)

            # 打印调试信息
            print("Images shape:", images.shape)
            print("Masks shape:", masks.shape)
            print("Unique mask values:", torch.unique(masks))

            # 前向传播
            outputs = model(images)
            print("Model output shape:", outputs.shape)

            loss = criterion(outputs, masks)

            # 反向传播与优化
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch [{epoch+1}/5], Loss: {total_loss / len(data_loader)}")

    # 保存模型
    torch.save(model.state_dict(), "icnet_model.pth")
    print("Model training complete and saved.")

if __name__ == "__main__":
    main()


ICNet(
  (conv): Conv2d(3, 21, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
Images shape: torch.Size([4, 3, 128, 128])
Masks shape: torch.Size([4, 128, 128])
Unique mask values: tensor([-1,  0,  1,  4, 14, 15, 19], device='cuda:0')
Model output shape: torch.Size([4, 21, 128, 128])
Images shape: torch.Size([4, 3, 128, 128])
Masks shape: torch.Size([4, 128, 128])
Unique mask values: tensor([-1,  0,  1,  2,  4,  6,  9, 10, 15, 18], device='cuda:0')
Model output shape: torch.Size([4, 21, 128, 128])
Images shape: torch.Size([4, 3, 128, 128])
Masks shape: torch.Size([4, 128, 128])
Unique mask values: tensor([-1,  0,  2,  7,  8, 12, 13, 15, 18], device='cuda:0')
Model output shape: torch.Size([4, 21, 128, 128])
Images shape: torch.Size([4, 3, 128, 128])
Masks shape: torch.Size([4, 128, 128])
Unique mask values: tensor([-1,  0,  5,  6, 15, 16, 17], device='cuda:0')
Model output shape: torch.Size([4, 21, 128, 128])
Images shape: torch.Size([4, 3, 128, 128])
Masks shape: torch.Size([4, 1

In [None]:
import os
import torch
import torch.nn as nn
from torchvision.datasets import VOCSegmentation
from torchvision import transforms
from torch.utils.data import DataLoader
import torchvision.transforms.functional as F_transforms
from PIL import Image
import numpy as np

# 数据加载器中的标签处理
def mask_transform(mask):
    mask = mask.resize((128, 128))
    mask = F_transforms.pil_to_tensor(mask).squeeze(0)
    mask = mask.long()
    # 将超出范围的值标记为无效值
    mask[(mask < 0) | (mask >= 21)] = -1
    return mask

# 计算 IoU
def compute_iou(predicted, target, num_classes=21):
    iou_scores = []
    for cls in range(num_classes):
        pred_mask = (predicted == cls)
        target_mask = (target == cls)

        intersection = (pred_mask & target_mask).sum()
        union = (pred_mask | target_mask).sum()

        if union == 0:
            iou_scores.append(float('nan'))  # 忽略该类
        else:
            iou_scores.append(intersection.item() / union.item())
    return np.nanmean(iou_scores)

# 计算像素准确率
def compute_pixel_accuracy(predicted, target):
    valid = (target != -1)  # 忽略无效像素
    correct = (predicted[valid] == target[valid]).sum()
    total = valid.sum()
    return correct.item() / total.item()

# 推理并评估
def infer_and_evaluate(model, device, data_loader):
    model.eval()
    iou_scores = []
    pixel_accuracies = []

    with torch.no_grad():
        for images, masks in data_loader:
            images, masks = images.to(device), masks.to(device)

            # 推理
            outputs = model(images)
            predicted = torch.argmax(outputs, dim=1)

            # 计算指标
            for pred, target in zip(predicted, masks):
                iou = compute_iou(pred.cpu().numpy(), target.cpu().numpy())
                pixel_acc = compute_pixel_accuracy(pred.cpu().numpy(), target.cpu().numpy())

                iou_scores.append(iou)
                pixel_accuracies.append(pixel_acc)

    avg_iou = np.nanmean(iou_scores)
    avg_pixel_acc = np.mean(pixel_accuracies)
    return avg_iou, avg_pixel_acc

# 主函数
def main():
    # 数据加载与预处理
    transform = transforms.Compose([
        transforms.Resize((128, 128)),
        transforms.ToTensor()
    ])
    dataset = VOCSegmentation(
        root='../data',
        year='2007',
        image_set='val',  # 验证集
        download=False,
        transform=transform,
        target_transform=mask_transform
    )
    data_loader = DataLoader(dataset, batch_size=4, shuffle=False)

    # 模型定义
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = ICNet(num_classes=21).to(device)
    model.load_state_dict(torch.load("icnet_model.pth", map_location=device))
    print("Model loaded successfully.")

    # 评估模型
    avg_iou, avg_pixel_acc = infer_and_evaluate(model, device, data_loader)
    print(f"Average IoU: {avg_iou:.4f}")
    print(f"Average Pixel Accuracy: {avg_pixel_acc:.4f}")

if __name__ == "__main__":
    main()


  model.load_state_dict(torch.load("icnet_model.pth", map_location=device))


Model loaded successfully.
Average IoU: 0.2514
Average Pixel Accuracy: 0.7216


### 15.PSPNet

In [5]:
class PyramidPoolingModule(nn.Module):
    def __init__(self, in_channels, out_channels, pool_sizes):
        super(PyramidPoolingModule, self).__init__()
        self.stages = nn.ModuleList([
            nn.Sequential(
                nn.AdaptiveAvgPool2d(output_size=max(size, 2)),  # 确保池化结果至少是 2x2
                nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
                nn.BatchNorm2d(out_channels),
                nn.ReLU(inplace=True)
            )
            for size in pool_sizes
        ])
        self.bottleneck = nn.Sequential(
            nn.Conv2d(in_channels + len(pool_sizes) * out_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        size = x.size()[2:]  # 原始输入的空间大小
        pooled = [x]  # 添加原始特征
        for stage in self.stages:
            pooled.append(F.interpolate(stage(x), size=size, mode='bilinear', align_corners=False))
        out = torch.cat(pooled, dim=1)
        return self.bottleneck(out)


In [6]:
class PSPNet(nn.Module):
    def __init__(self, num_classes, pretrained_path=None):
        super(PSPNet, self).__init__()
        backbone = resnet50(pretrained=False)

        if pretrained_path:
            print(f"Loading pretrained weights from {pretrained_path}")
            state_dict = torch.load(pretrained_path)
            backbone.load_state_dict(state_dict)

        # ResNet backbone
        self.layer0 = nn.Sequential(
            backbone.conv1,
            backbone.bn1,
            backbone.relu,
            backbone.maxpool,
        )
        self.layer1 = backbone.layer1
        self.layer2 = backbone.layer2
        self.layer3 = backbone.layer3
        self.layer4 = backbone.layer4

        # Pyramid Pooling Module
        self.ppm = PyramidPoolingModule(in_channels=2048, out_channels=512, pool_sizes=[1, 2, 3, 6])

        # Final classification head
        self.final = nn.Sequential(
            nn.Conv2d(512, 512, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, num_classes, kernel_size=1)
        )

    def forward(self, x):
        size = x.size()[2:]

        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.ppm(x)
        x = self.final(x)

        return F.interpolate(x, size=size, mode='bilinear', align_corners=False)


In [None]:
import os
import torch
import torch.nn as nn
from torchvision.datasets import VOCSegmentation
from torchvision import transforms
from torch.utils.data import DataLoader
import torchvision.transforms.functional as F_transforms
from torchvision.models import resnet50
import torch.nn.functional as F
# 自定义标签处理函数
def mask_transform(mask):
    mask = mask.resize((256, 256))  # 调整标签大小与输入一致
    mask = F_transforms.pil_to_tensor(mask).squeeze(0)
    mask = mask.long()

    # 将超出范围的值标记为无效值
    mask[(mask < 0) | (mask >= 21)] = -1
    return mask

# 主函数
def main():
    # 数据加载与预处理
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor()
    ])
    dataset = VOCSegmentation(
        root='../data',
        year='2007',
        image_set='train',
        download=False,
        transform=transform,
        target_transform=mask_transform
    )
    data_loader = DataLoader(dataset, batch_size=4, shuffle=True)

    # 手动下载的预训练模型路径
    pretrained_path = "resnet50-0676ba61.pth"

    # 模型定义
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = PSPNet(num_classes=21, pretrained_path=pretrained_path).to(device)
    criterion = nn.CrossEntropyLoss(ignore_index=-1)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # 训练循环
    num_epochs = 5
    for epoch in range(num_epochs):  # 假设训练 5 个 epoch
        model.train()
        total_loss = 0
        for batch_idx, (images, masks) in enumerate(data_loader):
            images, masks = images.to(device), masks.to(device)

            # 打印调试信息
            print(f"Epoch [{epoch + 1}/{num_epochs}] Batch {batch_idx + 1}")
            print("Images shape:", images.shape)
            print("Masks shape:", masks.shape)
            print("Unique mask values:", torch.unique(masks))

            # 前向传播
            print("Input image shape:", images.shape)
            outputs = model(images)
            print("Model output shape:", outputs.shape)

            loss = criterion(outputs, masks)

            # 反向传播与优化
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss / len(data_loader):.4f}")

    # 保存模型
    model_save_path = "pspnet_model.pth"
    torch.save(model.state_dict(), model_save_path)
    print(f"Model training complete and saved to {model_save_path}.")

if __name__ == "__main__":
    main()


Loading pretrained weights from resnet50-0676ba61.pth


  state_dict = torch.load(pretrained_path)


Epoch [1/5] Batch 1
Images shape: torch.Size([4, 3, 256, 256])
Masks shape: torch.Size([4, 256, 256])
Unique mask values: tensor([-1,  0,  1,  2,  7, 12, 15, 17], device='cuda:0')
Input image shape: torch.Size([4, 3, 256, 256])
Model output shape: torch.Size([4, 21, 256, 256])
Epoch [1/5] Batch 2
Images shape: torch.Size([4, 3, 256, 256])
Masks shape: torch.Size([4, 256, 256])
Unique mask values: tensor([-1,  0,  8,  9, 11, 14, 15, 20], device='cuda:0')
Input image shape: torch.Size([4, 3, 256, 256])
Model output shape: torch.Size([4, 21, 256, 256])
Epoch [1/5] Batch 3
Images shape: torch.Size([4, 3, 256, 256])
Masks shape: torch.Size([4, 256, 256])
Unique mask values: tensor([-1,  0,  2,  8,  9, 12, 13, 15, 18, 20], device='cuda:0')
Input image shape: torch.Size([4, 3, 256, 256])
Model output shape: torch.Size([4, 21, 256, 256])
Epoch [1/5] Batch 4
Images shape: torch.Size([4, 3, 256, 256])
Masks shape: torch.Size([4, 256, 256])
Unique mask values: tensor([-1,  0,  6, 13, 15, 19], dev

In [None]:
import torch
import torchvision.transforms.functional as F_transforms
from torchvision.datasets import VOCSegmentation
from torchvision import transforms
from torch.utils.data import DataLoader
import numpy as np


# 自定义标签处理函数
def mask_transform(mask):
    mask = mask.resize((256, 256))  # 调整标签大小与输入一致
    mask = F_transforms.pil_to_tensor(mask).squeeze(0)
    mask = mask.long()
    mask[(mask < 0) | (mask >= 21)] = -1  # 将超出范围的值标记为无效值
    return mask


# 加载模型
def load_model(model_path, num_classes=21, device="cuda"):
    model = PSPNet(num_classes=num_classes)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()  # 设置为评估模式
    return model


# 计算 IoU
def calculate_iou(preds, targets, num_classes):
    ious = []
    for cls in range(num_classes):
        pred_mask = preds == cls
        target_mask = targets == cls
        intersection = (pred_mask & target_mask).sum()
        union = (pred_mask | target_mask).sum()
        if union == 0:
            ious.append(float('nan'))  # 忽略该类别
        else:
            ious.append(intersection / union)
    return np.nanmean(ious)  # 计算所有类的平均 IoU


# 计算像素精度
def calculate_pixel_accuracy(preds, targets):
    valid = targets != -1  # 忽略无效值
    correct = (preds == targets) & valid
    accuracy = correct.sum() / valid.sum()
    return accuracy


# 推理函数
def evaluate_model(model, data_loader, device="cuda", num_classes=21):
    model.eval()
    iou_scores = []
    pixel_accuracies = []

    with torch.no_grad():
        for batch_idx, (images, masks) in enumerate(data_loader):
            images, masks = images.to(device), masks.to(device)

            # 模型前向推理
            outputs = model(images)  # [batch_size, num_classes, H, W]
            preds = torch.argmax(outputs, dim=1)  # [batch_size, H, W]

            # 计算指标
            for pred, mask in zip(preds, masks):
                iou = calculate_iou(pred.cpu().numpy(), mask.cpu().numpy(), num_classes)
                pixel_acc = calculate_pixel_accuracy(pred.cpu().numpy(), mask.cpu().numpy())
                iou_scores.append(iou)
                pixel_accuracies.append(pixel_acc)

    mean_iou = np.nanmean(iou_scores)  # 忽略 NaN
    mean_pixel_accuracy = np.mean(pixel_accuracies)
    return mean_iou, mean_pixel_accuracy


# 主推理逻辑
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model_path = "pspnet_model.pth"

    # 数据加载与预处理
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor()
    ])
    dataset = VOCSegmentation(
        root='../data',
        year='2007',
        image_set='val',
        download=False,
        transform=transform,
        target_transform=mask_transform
    )
    data_loader = DataLoader(dataset, batch_size=4, shuffle=False)

    # 加载模型
    model = load_model(model_path, num_classes=21, device=device)

    # 数据集评估
    mean_iou, mean_pixel_accuracy = evaluate_model(model, data_loader, device=device, num_classes=21)

    # 打印性能指标
    print(f"Mean IoU: {mean_iou:.4f}")
    print(f"Mean Pixel Accuracy: {mean_pixel_accuracy:.4f}")


if __name__ == "__main__":
    main()


  model.load_state_dict(torch.load(model_path, map_location=device))


Mean IoU: 0.0695
Mean Pixel Accuracy: 0.2025


### 16.DeepLab

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import VOCSegmentation
from torchvision.transforms import functional as F_transforms
from torchvision.models.segmentation.deeplabv3 import DeepLabHead
from torchvision.models.resnet import ResNet, Bottleneck
from torch.utils.data import DataLoader
from torchvision import transforms


# 自定义标签处理函数
def mask_transform(mask):
    mask = mask.resize((256, 256))  # 调整标签大小
    mask = F_transforms.pil_to_tensor(mask).squeeze(0)
    mask = mask.long()
    mask[(mask < 0) | (mask >= 21)] = -1  # 将超出范围的值标记为无效值
    return mask


# 自定义 ResNet50 加载函数
def load_custom_resnet50(weights_path=None, device="cuda"):
    # 构建 ResNet50 Backbone
    model = ResNet(
        block=Bottleneck,
        layers=[3, 4, 6, 3],  # ResNet-50 配置
        replace_stride_with_dilation=[False, True, True]
    )
    if weights_path:
        print(f"Loading ResNet50 backbone weights from {weights_path}")
        state_dict = torch.load(weights_path, map_location=device)
        model.load_state_dict(state_dict, strict=False)

    # 删除最后的全连接层及分类相关层，保留到最后的卷积层
    model = nn.Sequential(*list(model.children())[:-2])
    return model.to(device)


# 自定义 DeepLabV3 模型加载函数
def load_deeplab_model(num_classes=21, deeplab_pretrained_path=None, resnet_pretrained_path=None, device="cuda"):
    # 加载自定义的 ResNet50 Backbone
    backbone = load_custom_resnet50(weights_path=resnet_pretrained_path, device=device)

    # 构建 DeepLab 模型
    class DeepLabV3(nn.Module):
        def __init__(self, backbone, num_classes):
            super(DeepLabV3, self).__init__()
            self.backbone = backbone
            self.classifier = DeepLabHead(2048, num_classes)  # 2048是 ResNet 的输出通道数

        def forward(self, x):
            features = self.backbone(x)  # 获取特征图
            return {"out": self.classifier(features)}

    model = DeepLabV3(backbone, num_classes)

    # 加载 DeepLab 的预训练权重
    if deeplab_pretrained_path:
        print(f"Loading DeepLab pretrained weights from {deeplab_pretrained_path}")
        state_dict = torch.load(deeplab_pretrained_path, map_location=device)
        model.load_state_dict(state_dict, strict=False)

    model.to(device)
    return model


# 训练函数
def train_model(model, train_loader, criterion, optimizer, num_epochs, device="cuda"):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for batch_idx, (images, masks) in enumerate(train_loader):
            images, masks = images.to(device), masks.to(device)

            # 前向传播调试
            features = model.backbone(images)
            print("Backbone Output Shape:", features.shape)

            outputs = model.classifier(features)
            print("Classifier Output Shape:", outputs.shape)
            # 前向传播
            outputs = model(images)['out']  # DeepLab 返回一个字典
            outputs = nn.functional.interpolate(outputs, size=masks.shape[-2:], mode='bilinear', align_corners=False)  # 上采样

            # 计算损失
            loss = criterion(outputs, masks)

            # 反向传播与优化
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {total_loss / len(train_loader):.4f}")


# 主训练逻辑
def main():
    # 配置参数
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    num_classes = 21
    num_epochs = 5
    batch_size = 4
    learning_rate = 0.001

    # 数据预处理与加载
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor()
    ])
    dataset = VOCSegmentation(
        root='../data',
        year='2007',
        image_set='train',
        download=False,
        transform=transform,
        target_transform=mask_transform
    )
    train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True)

    # 手动下载的预训练权重路径
    resnet_pretrained_path = "resnet50-0676ba61.pth"
    deeplab_pretrained_path = "deeplabv3_resnet50_coco-cd0a2569.pth"

    # 加载模型
    model = load_deeplab_model(
        num_classes=num_classes,
        deeplab_pretrained_path=deeplab_pretrained_path,
        resnet_pretrained_path=resnet_pretrained_path,
        device=device
    )

    # 定义损失函数和优化器
    criterion = nn.CrossEntropyLoss(ignore_index=-1)  # 忽略无效标签
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # 训练模型
    train_model(model, train_loader, criterion, optimizer, num_epochs, device=device)

    # 保存模型
    torch.save(model.state_dict(), "deeplab_model.pth")
    print("Model training complete and saved to deeplab_model.pth")


if __name__ == "__main__":
    main()


Loading ResNet50 backbone weights from resnet50-0676ba61.pth


  state_dict = torch.load(weights_path, map_location=device)


Loading DeepLab pretrained weights from deeplabv3_resnet50_coco-cd0a2569.pth
Backbone Output Shape: torch.Size([4, 2048, 32, 32])
Classifier Output Shape: torch.Size([4, 21, 32, 32])


  state_dict = torch.load(deeplab_pretrained_path, map_location=device)


Backbone Output Shape: torch.Size([4, 2048, 32, 32])
Classifier Output Shape: torch.Size([4, 21, 32, 32])
Backbone Output Shape: torch.Size([4, 2048, 32, 32])
Classifier Output Shape: torch.Size([4, 21, 32, 32])
Backbone Output Shape: torch.Size([4, 2048, 32, 32])
Classifier Output Shape: torch.Size([4, 21, 32, 32])
Backbone Output Shape: torch.Size([4, 2048, 32, 32])
Classifier Output Shape: torch.Size([4, 21, 32, 32])
Backbone Output Shape: torch.Size([4, 2048, 32, 32])
Classifier Output Shape: torch.Size([4, 21, 32, 32])
Backbone Output Shape: torch.Size([4, 2048, 32, 32])
Classifier Output Shape: torch.Size([4, 21, 32, 32])
Backbone Output Shape: torch.Size([4, 2048, 32, 32])
Classifier Output Shape: torch.Size([4, 21, 32, 32])
Backbone Output Shape: torch.Size([4, 2048, 32, 32])
Classifier Output Shape: torch.Size([4, 21, 32, 32])
Backbone Output Shape: torch.Size([4, 2048, 32, 32])
Classifier Output Shape: torch.Size([4, 21, 32, 32])
Backbone Output Shape: torch.Size([4, 2048, 32

In [None]:
import torch
import torch.nn as nn
from torchvision.transforms import functional as F_transforms
from torchvision import transforms
from PIL import Image
import numpy as np


# 自定义标签处理函数
def mask_transform(mask):
    mask = mask.resize((256, 256))  # 调整标签大小
    mask = F_transforms.pil_to_tensor(mask).squeeze(0)
    mask = mask.long()
    mask[(mask < 0) | (mask >= 21)] = -1  # 将超出范围的值标记为无效值
    return mask


# 加载模型函数
def load_deeplab_model(num_classes=21, deeplab_pretrained_path=None, resnet_pretrained_path=None, device="cuda"):
    # 构建 ResNet50 Backbone
    model = ResNet(
        block=Bottleneck,
        layers=[3, 4, 6, 3],  # ResNet-50 配置
        replace_stride_with_dilation=[False, True, True]
    )
    if resnet_pretrained_path:
        print(f"Loading ResNet50 backbone weights from {resnet_pretrained_path}")
        state_dict = torch.load(resnet_pretrained_path, map_location=device)
        model.load_state_dict(state_dict, strict=False)

    # 删除最后的全连接层及分类相关层，保留到最后的卷积层
    backbone = nn.Sequential(*list(model.children())[:-2])

    # 构建 DeepLab 模型
    class DeepLabV3(nn.Module):
        def __init__(self, backbone, num_classes):
            super(DeepLabV3, self).__init__()
            self.backbone = backbone
            self.classifier = DeepLabHead(2048, num_classes)

        def forward(self, x):
            features = self.backbone(x)
            return {"out": self.classifier(features)}

    model = DeepLabV3(backbone, num_classes)

    # 加载 DeepLab 的预训练权重
    if deeplab_pretrained_path:
        print(f"Loading DeepLab pretrained weights from {deeplab_pretrained_path}")
        state_dict = torch.load(deeplab_pretrained_path, map_location=device)
        model.load_state_dict(state_dict)

    model.to(device)
    model.eval()
    return model


# 推理函数并计算指标
def evaluate(model, data_loader, device="cuda"):
    intersection = torch.zeros(21).to(device)  # 交集
    union = torch.zeros(21).to(device)         # 并集
    total_correct = 0                          # 总正确像素
    total_pixels = 0                           # 总像素

    with torch.no_grad():
        for images, masks in data_loader:
            images, masks = images.to(device), masks.to(device)

            # 模型推理
            outputs = model(images)['out']
            outputs = nn.functional.interpolate(outputs, size=masks.shape[-2:], mode='bilinear', align_corners=False)  # 调整尺寸
            predictions = torch.argmax(outputs, dim=1)  # 获取预测类别

            # 计算指标
            for cls in range(21):
                pred_cls = predictions == cls
                true_cls = masks == cls
                intersection[cls] += torch.sum(pred_cls & true_cls)
                union[cls] += torch.sum(pred_cls | true_cls)

            total_correct += torch.sum(predictions == masks).item()
            total_pixels += masks.numel()

    # 计算 mIoU 和像素准确率
    iou = intersection / (union + 1e-6)
    miou = torch.mean(iou).item()
    pixel_accuracy = total_correct / total_pixels
    return miou, pixel_accuracy



# 主推理逻辑
def main_inference():
    # 配置参数
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    num_classes = 21
    batch_size = 4
    resnet_pretrained_path = "resnet50-0676ba61.pth"
    deeplab_pretrained_path = "deeplab_model.pth"  # 已训练的模型权重

    # 数据预处理与加载
    transform = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor()
    ])
    dataset = VOCSegmentation(
        root='../data',
        year='2007',
        image_set='val',  # 验证集
        download=False,
        transform=transform,
        target_transform=mask_transform
    )
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

    # 加载模型
    model = load_deeplab_model(
        num_classes=num_classes,
        deeplab_pretrained_path=deeplab_pretrained_path,
        resnet_pretrained_path=resnet_pretrained_path,
        device=device
    )

    # 评估
    miou, pixel_accuracy = evaluate(model, data_loader, device=device)
    print(f"Mean IoU: {miou:.4f}")
    print(f"Pixel Accuracy: {pixel_accuracy:.4f}")


if __name__ == "__main__":
    main_inference()


Loading ResNet50 backbone weights from resnet50-0676ba61.pth


  state_dict = torch.load(resnet_pretrained_path, map_location=device)


Loading DeepLab pretrained weights from deeplab_model.pth


  state_dict = torch.load(deeplab_pretrained_path, map_location=device)


Mean IoU: 0.0374
Pixel Accuracy: 0.7022


### 17.Pixel-RNN

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from tqdm import tqdm

# 定义 PixelRNN 模型
class PixelRNN(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_classes):
        super(PixelRNN, self).__init__()
        self.hidden_dim = hidden_dim
        self.embedding = nn.Embedding(num_classes, input_dim)
        self.rnn = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        # 嵌入输入（独热编码 -> 嵌入）
        x = self.embedding(x.long())  # 输入为整数类型
        x = x.view(x.size(0), -1, x.size(-1))  # 展平图像
        # 经过 RNN
        h0 = torch.zeros(1, x.size(0), self.hidden_dim).to(x.device)
        c0 = torch.zeros(1, x.size(0), self.hidden_dim).to(x.device)
        out, _ = self.rnn(x, (h0, c0))
        # 全连接输出
        out = self.fc(out)
        return out.view(-1, 28, 28, 256)  # 输出与输入图像大小一致

# 数据加载与预处理
def load_data(batch_size):
    transform = transforms.Compose([
        transforms.ToTensor(),  # 转换为张量
    ])

    train_dataset = datasets.MNIST(root='../data', train=True, transform=transform, download=False)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    return train_loader

# 训练函数
def train_model(model, train_loader, criterion, optimizer, device, num_epochs):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
            images = images.to(device)
            # 将输入值映射到 [0, 255] 的整数值范围
            targets = (images * 255).clamp(0, 255).long()
            targets = targets.squeeze(1)  # 去掉单通道维度
            
            # 检查目标值范围
            assert targets.min() >= 0 and targets.max() <= 255, "Target values out of range!"

            # 前向传播
            outputs = model(targets)
            outputs = outputs.permute(0, 3, 1, 2)  # 调整维度为 (batch_size, num_classes, height, width)

            # 计算损失
            loss = criterion(outputs, targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss / len(train_loader):.4f}")

# 主函数
def main():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    batch_size = 64
    num_epochs = 5
    learning_rate = 0.001
    input_dim = 16  # 嵌入维度
    hidden_dim = 64  # RNN 隐藏层维度
    num_classes = 256  # 灰度值范围

    # 加载数据
    train_loader = load_data(batch_size)

    # 初始化模型、损失函数、优化器
    model = PixelRNN(input_dim, hidden_dim, num_classes).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # 训练模型
    train_model(model, train_loader, criterion, optimizer, device, num_epochs)

    # 保存模型
    torch.save(model.state_dict(), "pixel_rnn_mnist.pth")
    print("Model training complete and saved to pixel_rnn_mnist.pth")

if __name__ == "__main__":
    main()


Epoch 1/5: 100%|██████████| 938/938 [00:13<00:00, 68.36it/s]


Epoch [1/5], Loss: 0.7294


Epoch 2/5: 100%|██████████| 938/938 [00:13<00:00, 70.33it/s]


Epoch [2/5], Loss: 0.0423


Epoch 3/5: 100%|██████████| 938/938 [00:13<00:00, 70.77it/s]


Epoch [3/5], Loss: 0.0055


Epoch 4/5: 100%|██████████| 938/938 [00:13<00:00, 69.56it/s]


Epoch [4/5], Loss: 0.0021


Epoch 5/5: 100%|██████████| 938/938 [00:13<00:00, 68.96it/s]

Epoch [5/5], Loss: 0.0010
Model training complete and saved to pixel_rnn_mnist.pth





In [2]:
import torch
from torchvision import datasets, transforms
from tqdm import tqdm

# 定义计算像素准确率的函数
def calculate_pixel_accuracy(predictions, targets):
    """
    计算像素准确率（Pixel Accuracy）。
    :param predictions: 模型输出的预测值 (B, H, W)
    :param targets: 实际目标值 (B, H, W)
    :return: 像素准确率
    """
    correct = (predictions == targets).sum().item()
    total = targets.numel()  # 总像素数
    return correct / total

# 定义推理函数
def infer_model(model, test_loader, device):
    model.eval()
    total_pixel_accuracy = 0

    with torch.no_grad():
        for images, _ in tqdm(test_loader, desc="Inference"):
            images = images.to(device)
            targets = (images * 255).clamp(0, 255).long()
            targets = targets.squeeze(1)  # 去掉单通道维度
            
            # 前向传播
            outputs = model(targets)
            outputs = outputs.permute(0, 3, 1, 2)  # 调整维度为 (batch_size, num_classes, height, width)

            # 获取每个像素的预测类别
            predictions = torch.argmax(outputs, dim=1)

            # 计算像素准确率
            pixel_accuracy = calculate_pixel_accuracy(predictions, targets)
            total_pixel_accuracy += pixel_accuracy

    # 返回平均像素准确率
    avg_pixel_accuracy = total_pixel_accuracy / len(test_loader)
    return avg_pixel_accuracy

# 主推理逻辑
def main_inference():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    batch_size = 64
    input_dim = 16  # 嵌入维度
    hidden_dim = 64  # RNN 隐藏层维度
    num_classes = 256  # 灰度值范围

    # 数据加载
    transform = transforms.Compose([
        transforms.ToTensor(),  # 转换为张量
    ])
    test_dataset = datasets.MNIST(root='../data', train=False, transform=transform, download=False)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # 加载模型
    model = PixelRNN(input_dim, hidden_dim, num_classes).to(device)
    model.load_state_dict(torch.load("pixel_rnn_mnist.pth", map_location=device))

    # 推理并计算指标
    avg_pixel_accuracy = infer_model(model, test_loader, device)
    print(f"Average Pixel Accuracy: {avg_pixel_accuracy:.4f}")

if __name__ == "__main__":
    main_inference()


  model.load_state_dict(torch.load("pixel_rnn_mnist.pth", map_location=device))
Inference: 100%|██████████| 157/157 [00:01<00:00, 145.86it/s]

Average Pixel Accuracy: 1.0000





### 18.LSTM

In [5]:
python -m pip install --upgrade pip


SyntaxError: invalid syntax (3439513114.py, line 1)

In [1]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from sklearn.model_selection import train_test_split

# 自定义数据集类
class IMDBDataset(Dataset):
    def __init__(self, data, tokenizer, vocab, max_length=256):
        self.data = data
        self.tokenizer = tokenizer
        self.vocab = vocab
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        review, sentiment = self.data[idx]
        tokens = self.tokenizer(review)[:self.max_length]  # 分词并截断
        indices = [self.vocab.get(token, self.vocab["<unk>"]) for token in tokens]  # 转为索引
        indices_tensor = torch.tensor(indices, dtype=torch.long)
        label = 0 if sentiment == "negative" else 1  # 负面=0，正面=1
        return indices_tensor, torch.tensor(label, dtype=torch.long)

# 加载 CSV 数据
def load_imdb_from_csv(csv_path, test_size=0.2, max_length=256, batch_size=32):
    # 加载数据
    df = pd.read_csv(csv_path)
    reviews = df["review"].tolist()
    sentiments = df["sentiment"].tolist()

    # 划分训练集和测试集
    train_data, test_data = train_test_split(
        list(zip(reviews, sentiments)), test_size=test_size, random_state=42
    )

    # 定义分词器
    tokenizer = lambda x: x.lower().split()

    # 构建词汇表
    vocab = {"<unk>": 0}  # 未知词映射到索引 0
    for review, _ in train_data:
        for token in tokenizer(review):
            if token not in vocab:
                vocab[token] = len(vocab)

    # 构建 Dataset
    train_dataset = IMDBDataset(train_data, tokenizer, vocab, max_length)
    test_dataset = IMDBDataset(test_data, tokenizer, vocab, max_length)

    # 构建 DataLoader
    def collate_batch(batch):
        texts = [item[0] for item in batch]
        labels = torch.tensor([item[1] for item in batch], dtype=torch.long)
        texts = pad_sequence(texts, batch_first=True, padding_value=vocab["<unk>"])  # 填充
        return texts, labels

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_batch)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_batch)

    return train_loader, test_loader, vocab

# 测试数据加载
if __name__ == "__main__":
    csv_path = "./data/IMDB/IMDB_Dataset.csv"  # 确保文件路径正确
    train_loader, test_loader, vocab = load_imdb_from_csv(csv_path, batch_size=32)

    print(f"Vocab size: {len(vocab)}")
    for texts, labels in train_loader:
        print("Texts shape:", texts.shape)
        print("Labels shape:", labels.shape)
        break



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/home/fushaomin/miniconda3/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/fushaomin/miniconda3/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/home/fushaomin/miniconda3/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 739, in start
    self.io_loop.start()
  File 

ImportError: 
A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.



ImportError: numpy.core.multiarray failed to import

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.datasets import IMDB
from torchtext.data import Field, BucketIterator
from sklearn.metrics import accuracy_score

# 1. 数据预处理
TEXT = Field(sequential=True, tokenize='spacy', tokenizer_language='en_core_web_sm', include_lengths=True)
LABEL = Field(sequential=False, use_vocab=False, is_target=True)

# 加载IMDB数据集
train_data, test_data = IMDB.splits(TEXT, LABEL)

# 构建词汇表
TEXT.build_vocab(train_data, max_size=25000, vectors='glove.6B.100d', unk_init=torch.Tensor.normal_)

# 创建数据迭代器
BATCH_SIZE = 64
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, test_iterator = BucketIterator.splits(
    (train_data, test_data), 
    batch_size=BATCH_SIZE,
    sort_within_batch=True,
    device=device
)

# 2. 定义LSTM模型
class SentimentLSTM(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, n_layers, dropout):
        super(SentimentLSTM, self).__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, dropout=dropout, bidirectional=True)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)  # 双向LSTM
        self.dropout = nn.Dropout(dropout)

    def forward(self, text, text_lengths):
        embedded = self.embedding(text)
        packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths)
        packed_output, (hidden, cell) = self.lstm(packed_embedded)
        output, _ = nn.utils.rnn.pad_packed_sequence(packed_output)
        hidden = self.dropout(torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1))
        return self.fc(hidden)

# 3. 初始化模型
input_dim = len(TEXT.vocab)
embedding_dim = 100
hidden_dim = 256
output_dim = 1  # 二分类
n_layers = 2
dropout = 0.5

model = SentimentLSTM(input_dim, embedding_dim, hidden_dim, output_dim, n_layers, dropout).to(device)

# 使用GloVe词向量初始化embedding层
model.embedding.weight.data.copy_(TEXT.vocab.vectors)

# 4. 训练模型
optimizer = optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss()

def train(model, iterator, optimizer, criterion):
    model.train()
    epoch_loss = 0
    epoch_acc = 0
    
    for batch in iterator:
        text, text_lengths = batch.text
        labels = batch.label.float()

        optimizer.zero_grad()
        predictions = model(text, text_lengths).squeeze(1)
        
        loss = criterion(predictions, labels)
        acc = accuracy_score(labels.cpu().numpy(), predictions.cpu().round().detach().numpy())
        
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        epoch_acc += acc
    
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

# 5. 训练过程
num_epochs = 5
for epoch in range(num_epochs):
    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    print(f"Epoch {epoch+1}: Train Loss = {train_loss:.4f}, Train Accuracy = {train_acc:.4f}")

# 6. 保存模型
torch.save(model.state_dict(), 'sentiment_lstm.pth')




OSError: /home/fushaomin/miniconda3/lib/python3.12/site-packages/torchtext/lib/libtorchtext.so: undefined symbol: _ZN5torch3jit17parseSchemaOrNameERKSs

### 19.GNMT

In [6]:
pip install spacy

Collecting spacy
[0m  Downloading spacy-3.8.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (27 kB)
Collecting spacy-legacy<3.1.0,>=3.0.11 (from spacy)
  Downloading spacy_legacy-3.0.12-py2.py3-none-any.whl.metadata (2.8 kB)
Collecting spacy-loggers<2.0.0,>=1.0.0 (from spacy)
  Downloading spacy_loggers-1.0.5-py3-none-any.whl.metadata (23 kB)
Collecting murmurhash<1.1.0,>=0.28.0 (from spacy)
  Downloading murmurhash-1.0.11-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.0 kB)
Collecting cymem<2.1.0,>=2.0.2 (from spacy)
  Downloading cymem-2.0.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.4 kB)
Collecting preshed<3.1.0,>=3.0.2 (from spacy)
  Downloading preshed-3.0.9-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.2 kB)
Collecting thinc<8.4.0,>=8.3.0 (from spacy)
  Downloading thinc-8.3.2-cp312-cp312-manylinux_2_17

In [3]:
pip install sacremoses

Collecting sacremoses
[0m  Downloading sacremoses-0.1.1-py3-none-any.whl.metadata (8.3 kB)
Collecting regex (from sacremoses)
  Downloading regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
[31mERROR: Exception:
Traceback (most recent call last):
  File "/home/fushaomin/miniconda3/lib/python3.12/site-packages/pip/_vendor/urllib3/response.py", line 438, in _error_catcher
    yield
  File "/home/fushaomin/miniconda3/lib/python3.12/site-packages/pip/_vendor/urllib3/response.py", line 561, in read
    data = self._fp_read(amt) if not fp_closed else b""
           ^^^^^^^^^^^^^^^^^^
  File "/home/fushaomin/miniconda3/lib/python3.12/site-packages/pip/_vendor/urllib3/response.py", line 527, in _fp_read
    return self._fp.read(amt) if amt is not None else self._fp.read()
           ^^^^^^^^^^^^^^^^^^
  File "/home/fushaomin/miniconda3/lib/python3.12/site-packages/pip/_vendor/cachecontrol/filewrapper.py", line 98, in read
    data: bytes = self.__fp.

In [None]:
pip install sacremoses

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from sacremoses import MosesTokenizer
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, Dataset

# 数据配置
DATA_DIR = "data/wmt14"  # 替换为你的数据路径
TRAIN_SRC_FILE = f"{DATA_DIR}/train.en"
TRAIN_TGT_FILE = f"{DATA_DIR}/train.de"
VALIDATION_SRC_FILE = f"{DATA_DIR}/val.en"
VALIDATION_TGT_FILE = f"{DATA_DIR}/val.de"

# 加载数据文件
def load_data(src_file, tgt_file):
    with open(src_file, 'r', encoding='utf-8') as src_f, open(tgt_file, 'r', encoding='utf-8') as tgt_f:
        src_sentences = src_f.readlines()
        tgt_sentences = tgt_f.readlines()
    return src_sentences, tgt_sentences

train_src, train_tgt = load_data(TRAIN_SRC_FILE, TRAIN_TGT_FILE)
val_src, val_tgt = load_data(VALIDATION_SRC_FILE, VALIDATION_TGT_FILE)

# 分词器
tokenizer = MosesTokenizer(lang="en")

# 构建词汇表
def build_vocab(sentences, tokenizer, specials=["<unk>", "<pad>", "<bos>", "<eos>"]):
    vocab = {special: idx for idx, special in enumerate(specials)}
    idx = len(specials)
    for sentence in sentences:
        tokens = tokenizer(sentence)
        for token in tokens:
            if token not in vocab:
                vocab[token] = idx
                idx += 1
    return vocab

src_vocab = build_vocab(train_src, tokenizer.tokenize)
tgt_vocab = build_vocab(train_tgt, tokenizer.tokenize)

# 将句子转换为张量
def sentence_to_tensor(sentence, vocab, tokenizer):
    tokens = tokenizer(sentence)
    return torch.tensor([vocab["<bos>"]] + [vocab.get(token, vocab["<unk>"]) for token in tokens] + [vocab["<eos>"]])

# 自定义数据集
class TranslationDataset(Dataset):
    def __init__(self, src_sentences, tgt_sentences, src_vocab, tgt_vocab, tokenizer):
        self.src_sentences = src_sentences
        self.tgt_sentences = tgt_sentences
        self.src_vocab = src_vocab
        self.tgt_vocab = tgt_vocab
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.src_sentences)

    def __getitem__(self, idx):
        src_tensor = sentence_to_tensor(self.src_sentences[idx], self.src_vocab, self.tokenizer)
        tgt_tensor = sentence_to_tensor(self.tgt_sentences[idx], self.tgt_vocab, self.tokenizer)
        return src_tensor, tgt_tensor

# 数据加载器
def collate_fn(batch):
    src_batch, tgt_batch = zip(*batch)
    src_batch = pad_sequence(src_batch, batch_first=True, padding_value=src_vocab["<pad>"])
    tgt_batch = pad_sequence(tgt_batch, batch_first=True, padding_value=tgt_vocab["<pad>"])
    return src_batch, tgt_batch

train_dataset = TranslationDataset(train_src, train_tgt, src_vocab, tgt_vocab, tokenizer.tokenize)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_fn)

# GNMT 模型定义
class GNMT(nn.Module):
    def __init__(self, src_vocab_size, tgt_vocab_size, embed_size, hidden_size, num_layers, dropout=0.1):
        super(GNMT, self).__init__()
        self.encoder = nn.Embedding(src_vocab_size, embed_size)
        self.decoder = nn.Embedding(tgt_vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_size, tgt_vocab_size)

    def forward(self, src, tgt):
        src_embed = self.encoder(src)
        tgt_embed = self.decoder(tgt)
        outputs, _ = self.lstm(torch.cat([src_embed, tgt_embed], dim=1))
        logits = self.fc(outputs)
        return logits

# 训练配置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
embed_size = 256
hidden_size = 512
num_layers = 2
dropout = 0.1
model = GNMT(len(src_vocab), len(tgt_vocab), embed_size, hidden_size, num_layers, dropout).to(device)
criterion = nn.CrossEntropyLoss(ignore_index=src_vocab["<pad>"])
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
def train_model(model, train_dataloader, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for src, tgt in train_dataloader:
            src, tgt = src.to(device), tgt.to(device)
            optimizer.zero_grad()
            outputs = model(src[:, :-1], tgt[:, :-1])
            loss = criterion(outputs.reshape(-1, outputs.size(-1)), tgt[:, 1:].reshape(-1))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss:.4f}")

train_model(model, train_dataloader, criterion, optimizer)

# 保存模型
torch.save(model.state_dict(), "gnmt_model.pth")



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/home/fushaomin/miniconda3/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/fushaomin/miniconda3/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/home/fushaomin/miniconda3/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 739, in start
    self.io_loop.start()
  File 

ImportError: 
A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.



ImportError: numpy.core.multiarray failed to import

In [None]:
import torch
import torch.nn as nn
from sacremoses import MosesTokenizer
from torchtext.vocab import Vocab

# GNMT 模型定义（与训练时保持一致）
class GNMT(nn.Module):
    def __init__(self, src_vocab_size, tgt_vocab_size, embed_size, hidden_size, num_layers, dropout=0.1):
        super(GNMT, self).__init__()
        self.encoder = nn.Embedding(src_vocab_size, embed_size)
        self.decoder = nn.Embedding(tgt_vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_size, tgt_vocab_size)

    def forward(self, src, tgt):
        src_embed = self.encoder(src)
        tgt_embed = self.decoder(tgt)
        outputs, _ = self.lstm(torch.cat([src_embed, tgt_embed], dim=1))
        logits = self.fc(outputs)
        return logits

# 加载词汇表
src_vocab = torch.load("src_vocab.pth")  # 替换为训练时保存的 src_vocab
tgt_vocab = torch.load("tgt_vocab.pth")  # 替换为训练时保存的 tgt_vocab

# 加载模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
embed_size = 256
hidden_size = 512
num_layers = 2
dropout = 0.1
model = GNMT(len(src_vocab), len(tgt_vocab), embed_size, hidden_size, num_layers, dropout).to(device)
model.load_state_dict(torch.load("gnmt_model.pth"))
model.eval()

# 翻译函数
def translate(model, sentence, src_vocab, tgt_vocab, tokenizer, max_len=50):
    src_tensor = torch.tensor([src_vocab["<bos>"]] + [src_vocab[token] for token in tokenizer(sentence)] + [src_vocab["<eos>"]]).unsqueeze(0).to(device)
    tgt_tensor = torch.tensor([tgt_vocab["<bos>"]]).unsqueeze(0).to(device)
    
    for _ in range(max_len):
        with torch.no_grad():
            output = model(src_tensor, tgt_tensor)
        next_token = output.argmax(2)[:, -1]
        tgt_tensor = torch.cat([tgt_tensor, next_token.unsqueeze(0)], dim=1)
        if next_token.item() == tgt_vocab["<eos>"]:
            break
    
    return " ".join([tgt_vocab.itos[idx] for idx in tgt_tensor.squeeze(0).tolist()[1:-1]])

# 示例推理
tokenizer = MosesTokenizer(lang="en")
sentence = "This is a test sentence."
translation = translate(model, sentence, src_vocab, tgt_vocab, tokenizer)
print("Translated Sentence:", translation)
