In [4]:
import torch
import torchvision
import torchvision.transforms as transforms

# 转换数据格式并且加载数据
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,))])

trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)

# 定义网络模型
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = torch.nn.Conv2d(1, 6, 5)
        self.pool = torch.nn.MaxPool2d(2, 2)
        self.conv2 = torch.nn.Conv2d(6, 16, 5)
        self.fc1 = torch.nn.Linear(16 * 4 * 4, 120)
        self.fc2 = torch.nn.Linear(120, 84)
        self.fc3 = torch.nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(torch.nn.functional.relu(self.conv1(x)))
        x = self.pool(torch.nn.functional.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.relu(self.fc2(x))
        x = self.fc3(x)
        return x

net = Net()

# 这里的代码比较随意，就是用哪个平台运行哪个
# CPU
device = torch.device("cpu")
# CUDA
# device = torch.device("cuda:0")
# MPS
# device = torch.device("mps")

net.to(device)

# 定义损失函数和优化器
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# 训练网络

import time

start_time = time.time()  # 记录开始时间

for epoch in range(10):  # 进行10次迭代训练
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

end_time = time.time()  # 记录结束时间
training_time = end_time - start_time  # 计算训练时间

print('Training took %.2f seconds.' % training_time)

print('Finished Training')

# 测试网络
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(
    f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')


[1,   100] loss: 2.301
[1,   200] loss: 2.294
[1,   300] loss: 2.281
[1,   400] loss: 2.260
[1,   500] loss: 2.210
[1,   600] loss: 2.042
[1,   700] loss: 1.407
[1,   800] loss: 0.772
[1,   900] loss: 0.552
[2,   100] loss: 0.434
[2,   200] loss: 0.397
[2,   300] loss: 0.357
[2,   400] loss: 0.317
[2,   500] loss: 0.305
[2,   600] loss: 0.277
[2,   700] loss: 0.276
[2,   800] loss: 0.245
[2,   900] loss: 0.238
[3,   100] loss: 0.222
[3,   200] loss: 0.214
[3,   300] loss: 0.199
[3,   400] loss: 0.182
[3,   500] loss: 0.168
[3,   600] loss: 0.172
[3,   700] loss: 0.178
[3,   800] loss: 0.163
[3,   900] loss: 0.152
[4,   100] loss: 0.150
[4,   200] loss: 0.153
[4,   300] loss: 0.146
[4,   400] loss: 0.140
[4,   500] loss: 0.124
[4,   600] loss: 0.128
[4,   700] loss: 0.114
[4,   800] loss: 0.128
[4,   900] loss: 0.119
[5,   100] loss: 0.105
[5,   200] loss: 0.099
[5,   300] loss: 0.103
[5,   400] loss: 0.117
[5,   500] loss: 0.113
[5,   600] loss: 0.105
[5,   700] loss: 0.106
[5,   800] 

In [5]:
# 这里的代码比较随意，就是用哪个平台运行哪个
# CPU
# device = torch.device("cpu")
# CUDA
import time
device = torch.device("cuda:0")
# MPS
# device = torch.device("mps")

net.to(device)

# 定义损失函数和优化器
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

# 训练网络


start_time = time.time()  # 记录开始时间

for epoch in range(10):  # 进行10次迭代训练
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 100 == 99:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0

end_time = time.time()  # 记录结束时间
training_time = end_time - start_time  # 计算训练时间

print('Training took %.2f seconds.' % training_time)

print('Finished Training')

# 测试网络
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(
    f'Accuracy of the network on the 10000 test images: {100 * correct // total} %')

[1,   100] loss: 0.062
[1,   200] loss: 0.055
[1,   300] loss: 0.055
[1,   400] loss: 0.055
[1,   500] loss: 0.053
[1,   600] loss: 0.056
[1,   700] loss: 0.055
[1,   800] loss: 0.053
[1,   900] loss: 0.054
[2,   100] loss: 0.063
[2,   200] loss: 0.050
[2,   300] loss: 0.052
[2,   400] loss: 0.051
[2,   500] loss: 0.049
[2,   600] loss: 0.046
[2,   700] loss: 0.049
[2,   800] loss: 0.055
[2,   900] loss: 0.055
[3,   100] loss: 0.047
[3,   200] loss: 0.052
[3,   300] loss: 0.050
[3,   400] loss: 0.051
[3,   500] loss: 0.048
[3,   600] loss: 0.046
[3,   700] loss: 0.051
[3,   800] loss: 0.051
[3,   900] loss: 0.044
[4,   100] loss: 0.045
[4,   200] loss: 0.039
[4,   300] loss: 0.042
[4,   400] loss: 0.041
[4,   500] loss: 0.046
[4,   600] loss: 0.042
[4,   700] loss: 0.056
[4,   800] loss: 0.044
[4,   900] loss: 0.046
[5,   100] loss: 0.040
[5,   200] loss: 0.046
[5,   300] loss: 0.040
[5,   400] loss: 0.037
[5,   500] loss: 0.045
[5,   600] loss: 0.047
[5,   700] loss: 0.043
[5,   800] 

In [4]:
import torch
import torchvision
import torchvision.models as models
import time
import numpy as np

def test_on_device(model, dump_inputs, warn_up, loops, device_type):
    if device_type == 'cuda':
        assert torch.cuda.is_available()
    device = torch.device(device_type)

    # model = models.alexnet.alexnet(pretrained=False).to(device)
    model.to(device)
    model.eval()
    dump_inputs = dump_inputs.to(device)

    with torch.no_grad():
        executions = []
        for i in range(warn_up + loops):
            if device_type == 'cuda':
                torch.cuda.synchronize()
            start = time.time()
            _ = model(dump_inputs)
            if device_type == 'cuda':
                torch.cuda.synchronize() # CUDA sync
            end = time.time()
            executions.append((end-start)*1000) # ms
    # print(f'Avg time:{np.mean(executions)} ms')
    return np.mean(executions[warn_up:])


if __name__ == "__main__":
    # print(torch.cuda.is_available())
    model_list = {
        'AlexNet': models.alexnet(),
        'ResNet-50': models.resnet50(),
        'ResNet-18': models.resnet18(),
        'ResNet-101': models.resnet101(),
        'MobileNet-v2':models.mobilenet_v2(),
        'SqueezeNet1-1': models.squeezenet1_1()
    }

    batch_size = 1
    for name, model in model_list.items():
        print('='*10+f'{name}'+'='*10)
        avg_time = test_on_device(model=model, dump_inputs=torch.rand(batch_size, 3, 224, 224), warn_up=3, loops=10, device_type='cuda')
        print(f'Avg time:{avg_time} ms')

Avg time:6.475734710693359 ms
Avg time:16.47512912750244 ms
Avg time:7.0815324783325195 ms
Avg time:26.921486854553223 ms
Avg time:8.457207679748535 ms
Avg time:4.254484176635742 ms


In [5]:
import torch
import torchvision
import torchvision.models as models
import time
import numpy as np

def test_on_device(model, dump_inputs, warn_up, loops, device_type):
    if device_type == 'cuda':
        assert torch.cuda.is_available()
    device = torch.device(device_type)

    # model = models.alexnet.alexnet(pretrained=False).to(device)
    model.to(device)
    model.eval()
    dump_inputs = dump_inputs.to(device)

    with torch.no_grad():
        executions = []
        for i in range(warn_up + loops):
            if device_type == 'cuda':
                torch.cuda.synchronize()
            start = time.time()
            _ = model(dump_inputs)
            if device_type == 'cuda':
                torch.cuda.synchronize() # CUDA sync
            end = time.time()
            executions.append((end-start)*1000) # ms
    # print(f'Avg time:{np.mean(executions)} ms')
    return np.mean(executions[warn_up:])


if __name__ == "__main__":
    # print(torch.cuda.is_available())
    model_list = {
        'AlexNet': models.alexnet(),
        'ResNet-50': models.resnet50(),
        'ResNet-18': models.resnet18(),
        'ResNet-101': models.resnet101(),
        'MobileNet-v2':models.mobilenet_v2(),
        'SqueezeNet1-1': models.squeezenet1_1()
    }

    batch_size = 1
    for name, model in model_list.items():
        print('='*10+f'{name}'+'='*10)
        avg_time = test_on_device(model=model, dump_inputs=torch.rand(batch_size, 3, 224, 224), warn_up=3, loops=10, device_type='cpu')
        print(f'Avg time:{avg_time} ms')

Avg time:30.80286979675293 ms
Avg time:122.4935531616211 ms
Avg time:61.290574073791504 ms
Avg time:196.42741680145264 ms
Avg time:34.5745325088501 ms
Avg time:19.842529296875 ms
