resnet18 dml

In [1]:

#conda env base ,test ok
# use torch pretrained model
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet18
import time
import os

os.environ["HSA_OVERRIDE_GFX_VERSION"] = "11.0.0"

device = torch.device("cuda:0")


# 数据预处理
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),  # 随机水平翻转
    transforms.RandomCrop(32, padding=4),  # 随机裁剪
    transforms.ToTensor(),  # 转换为张量
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # 标准化
])

In [3]:
# 加载 CIFAR-10 数据集
batch_size = 128
train_dataset = torchvision.datasets.CIFAR10(root='./cifar10/data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)

test_dataset = torchvision.datasets.CIFAR10(root='./cifar10/data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [4]:

# 定义模型
model = resnet18(pretrained=False, num_classes=10)  # ResNet18，适配 CIFAR-10 的 10 个类别
model = model.to(device)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)

# 学习率调度器
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

# 训练函数
def train_model(model, train_loader, criterion, optimizer, epochs):
    model.train()
    for epoch in range(epochs):
        start_time = time.time()
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)

            # 前向 + 反向 + 优化
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if i % 100 == 0:  # 每 100 个 batch 打印一次
                print(f"Epoch [{epoch + 1}/{epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}")

        # 每个 epoch 的时间
        epoch_time = time.time() - start_time
        print(f"Epoch {epoch + 1} finished. Time: {epoch_time:.2f}s, Average Loss: {running_loss / len(train_loader):.4f}")

        scheduler.step()



In [5]:
# 测试函数
def test_model(model, test_loader, criterion):
    model.eval()
    correct = 0
    total = 0
    test_loss = 0.0

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Test Accuracy: {100 * correct / total:.2f}%")
    print(f"Test Loss: {test_loss / len(test_loader):.4f}")



In [6]:
# 开始训练和测试
num_epochs = 20
print("Starting Training...")
train_model(model, train_loader, criterion, optimizer, num_epochs)
print("Training Complete. Starting Testing...")
test_model(model, test_loader, criterion)
# 保存模型权重
torch.save(model.state_dict(), "./cifar10/model/resnet18_cifar10.pth")
print("Model weights saved to resnet18_cifar10.pth")


Starting Training...


  return F.linear(input, self.weight, self.bias)


Epoch [1/20], Step [1/391], Loss: 2.5054
Epoch [1/20], Step [101/391], Loss: 2.3764
Epoch [1/20], Step [201/391], Loss: 2.0655
Epoch [1/20], Step [301/391], Loss: 1.5872
Epoch 1 finished. Time: 68.69s, Average Loss: 1.9948
Epoch [2/20], Step [1/391], Loss: 1.5906
Epoch [2/20], Step [101/391], Loss: 1.4809
Epoch [2/20], Step [201/391], Loss: 1.4374
Epoch [2/20], Step [301/391], Loss: 1.4461
Epoch 2 finished. Time: 16.18s, Average Loss: 1.4538
Epoch [3/20], Step [1/391], Loss: 1.3392
Epoch [3/20], Step [101/391], Loss: 1.2811
Epoch [3/20], Step [201/391], Loss: 1.3296
Epoch [3/20], Step [301/391], Loss: 1.2623
Epoch 3 finished. Time: 15.84s, Average Loss: 1.2808
Epoch [4/20], Step [1/391], Loss: 1.2965
Epoch [4/20], Step [101/391], Loss: 1.1157
Epoch [4/20], Step [201/391], Loss: 1.0633
Epoch [4/20], Step [301/391], Loss: 0.9886
Epoch 4 finished. Time: 15.81s, Average Loss: 1.1498
Epoch [5/20], Step [1/391], Loss: 1.0651
Epoch [5/20], Step [101/391], Loss: 1.0837
Epoch [5/20], Step [201/

In [6]:
# # 加载完整模型
# model = torch.load("resnet18_cifar10_complete.pth")
# model = model.to(dml)
# print("Complete model loaded from resnet18_cifar10_complete.pth")

