In [1]:
import torch
import torch.nn as nn
import time

In [2]:
# 定义测试模型
class ConvNet3x3(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ConvNet3x3, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
    
    def forward(self, x):
        return self.conv(x)

class ConvNet5x5(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ConvNet5x5, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=5, padding=2)
    
    def forward(self, x):
        return self.conv(x)

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 创建输入数据
input_data = torch.randn(100, 3, 224, 224).to(device)  # (batch_size, in_channels, height, width)

In [4]:
# 实例化模型并移动到 GPU
model_3x3 = ConvNet3x3(in_channels=3, out_channels=64).to(device)
model_5x5 = ConvNet5x5(in_channels=3, out_channels=64).to(device)

In [5]:
def measure_time(model, input_data, iterations=10):
    # 预热 GPU
    for _ in range(10):
        model(input_data)
    if torch.cuda.is_available():
        torch.cuda.synchronize()

    # 正式测量
    start_time = time.time()
    for _ in range(iterations):
        output = model(input_data)
    if torch.cuda.is_available():
        torch.cuda.synchronize()
    total_time = time.time() - start_time

    return total_time / iterations

In [6]:
# 测量 3x3 卷积核的前向传播时间
time_3x3 = measure_time(model_3x3, input_data)
# 测量 5x5 卷积核的前向传播时间
time_5x5 = measure_time(model_5x5, input_data)

print(f"3x3 卷积核的平均前向传播时间: {time_3x3:.6f} 秒")
print(f"5x5 卷积核的平均前向传播时间: {time_5x5:.6f} 秒")

3x3 卷积核的平均前向传播时间: 0.141407 秒
5x5 卷积核的平均前向传播时间: 0.171332 秒
