# benchmark

> A module to benchmark Pytorch model according to: size, speed, compute and energy.

In [None]:
#| default_exp benchmark

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()

In [None]:
import torch
import time
from torchprofile import profile_macs
from codecarbon import OfflineEmissionsTracker
import numpy as np
import os

## Size

In [None]:
#| export
def get_model_size(model, temp_path="temp_model.pth"):
    torch.save(model.state_dict(), temp_path)
    model_size = os.path.getsize(temp_path)
    os.remove(temp_path)
    
    return model_size

In [None]:
#| export
def get_num_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

## Speed

In [None]:
#| export
@torch.inference_mode()
def evaluate_gpu_speed(model, dummy_input, warmup_rounds=50, test_rounds=100):
    device = torch.device("cuda")
    model.eval()
    model.to(device)
    dummy_input = dummy_input.to(device)
    
    starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
    latencies = []

    # Warm up GPU
    for _ in range(warmup_rounds):
        _ = model(dummy_input)
    
    # Measure Latency
    for _ in range(test_rounds):
        starter.record()
        _ = model(dummy_input)
        ender.record()
        torch.cuda.synchronize()
        latencies.append(starter.elapsed_time(ender))  # time in milliseconds
    
    latencies = np.array(latencies)
    mean_latency = np.mean(latencies)
    std_latency = np.std(latencies)

    # Measure Throughput
    throughput = dummy_input.size(0) * 1000 / mean_latency  # Inferences per second

    return mean_latency, std_latency, throughput

In [None]:
#| export
@torch.inference_mode()
def evaluate_cpu_speed(model, dummy_input, warmup_rounds=50, test_rounds=100):
    device = torch.device("cpu")
    model.eval()
    model.to(device)
    dummy_input = dummy_input.to(device)
    
    # Warm up CPU
    for _ in range(warmup_rounds):
        _ = model(dummy_input)
    
    # Measure Latency
    latencies = []
    for _ in range(test_rounds):
        start_time = time.perf_counter()
        _ = model(dummy_input)
        end_time = time.perf_counter()
        latencies.append(end_time - start_time)
    
    latencies = np.array(latencies) * 1000  # Convert to milliseconds
    mean_latency = np.mean(latencies)
    std_latency = np.std(latencies)

    # Measure Throughput
    throughput = dummy_input.size(0) * 1000 / mean_latency  # Inferences per second

    return mean_latency, std_latency, throughput

## Compute

In [None]:
#| export
@torch.inference_mode()
def get_model_macs(model, inputs) -> int:
    return profile_macs(model, inputs)

In [None]:
#| export
@torch.inference_mode()
def evaluate_gpu_memory_usage(model, dummy_input, warmup_rounds=10, test_rounds=100):
    device = torch.device("cuda")
    model.eval()
    model.to(device)
    dummy_input = dummy_input.to(device)
    
    # Warm up GPU
    for _ in range(warmup_rounds):
        _ = model(dummy_input)
    
    # Measure Memory Usage
    memory_usages = []
    for _ in range(test_rounds):
        torch.cuda.reset_peak_memory_stats(device)
        _ = model(dummy_input)
        torch.cuda.synchronize()
        memory_usages.append(torch.cuda.memory_allocated(device))
    
    memory_usages = np.array(memory_usages)
    average_memory_usage = np.mean(memory_usages)
    peak_memory_usage = torch.cuda.max_memory_allocated(device)
    
    return average_memory_usage, peak_memory_usage

## Energy

In [None]:
#| export
def evaluate_emissions(model, dummy_input, warmup_rounds=50, test_rounds=100):
    device = torch.device("cuda")
    model.eval()
    model.to(device)
    dummy_input = dummy_input.to(device)

    # Warm up GPU
    for _ in range(warmup_rounds):
        _ = model(dummy_input)
    
    # Measure Latency
    tracker = OfflineEmissionsTracker(country_iso_code="USA")
    tracker.start()
    for _ in range(test_rounds):
        _ = model(dummy_input)
    tracker.stop()
    total_emissions = tracker.final_emissions
    total_energy_consumed = tracker.final_emissions_data.energy_consumed
    
    # Calculate average emissions and energy consumption per inference
    average_emissions_per_inference = total_emissions / test_rounds
    average_energy_per_inference = total_energy_consumed / test_rounds
    
    return average_emissions_per_inference, average_energy_per_inference

In [None]:
#| export
def format_number(num):
    if num >= 1e9:
        return f"{num/1e9:.2f} B"
    elif num >= 1e6:
        return f"{num/1e6:.2f} M"
    elif num >= 1e3:
        return f"{num/1e3:.2f} K"
    else:
        return f"{num:.2f}"

In [None]:
#| export
@torch.inference_mode()
def benchmark(model, dummy_input):
    # Model Size
    disk_size = get_model_size(model)
    num_parameters = get_num_parameters(model)
    
    # GPU Speed
    gpu_latency, gpu_std_latency, gpu_throughput = evaluate_gpu_speed(model, dummy_input)
    
    # CPU Speed
    cpu_latency, cpu_std_latency, cpu_throughput = evaluate_cpu_speed(model, dummy_input)
    
    # Model MACs
    macs = get_model_macs(model, dummy_input)
    
    # GPU Memory Usage
    avg_gpu_memory, peak_gpu_memory = evaluate_gpu_memory_usage(model, dummy_input)
    
    # Emissions
    avg_emissions, avg_energy = evaluate_emissions(model, dummy_input)
    
    # Print results
    print(f"Model Size: {disk_size / 1e6:.2f} MB (disk), {format_number(num_parameters)} parameters")
    print(f"GPU Latency: {gpu_latency:.3f} ms (± {gpu_std_latency:.3f} ms)")
    print(f"GPU Throughput: {gpu_throughput:.2f} inferences/sec")
    print(f"CPU Latency: {cpu_latency:.3f} ms (± {cpu_std_latency:.3f} ms)")
    print(f"CPU Throughput: {cpu_throughput:.2f} inferences/sec")
    print(f"Model MACs: {format_number(macs)}")
    print(f"Average GPU Memory Usage: {avg_gpu_memory / 1e6:.2f} MB")
    print(f"Peak GPU Memory Usage: {peak_gpu_memory / 1e6:.2f} MB")
    print(f"Average Carbon Emissions per Inference: {avg_emissions*1e3:.6f} gCO2e")
    print(f"Average Energy Consumption per Inference: {avg_energy*1e3:.6f} Wh")

    return {

        'disk_size': disk_size,
        'num_parameters': num_parameters, 
        'gpu_latency': gpu_latency, 
        'gpu_throughput': gpu_throughput,
        'cpu_latency': cpu_latency,
        'cpu_throughput': cpu_throughput,
        'macs': macs, 
        'avg_gpu_memory': avg_gpu_memory, 
        'peak_gpu_memory': peak_gpu_memory,
        'avg_emissions': avg_emissions, 
        'avg_energy': avg_energy
        
    }

In [None]:
from torchvision.models import resnet18

model = resnet18()
dummy_input = torch.randn(64, 3, 224, 224)

In [None]:
benchmark(model, dummy_input)

[codecarbon INFO @ 13:54:16] offline tracker init
[codecarbon INFO @ 13:54:16] [setup] RAM Tracking...
[codecarbon INFO @ 13:54:16] [setup] GPU Tracking...
[codecarbon INFO @ 13:54:16] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 13:54:16] [setup] CPU Tracking...
[codecarbon INFO @ 13:54:17] CPU Model on constant consumption mode: 12th Gen Intel(R) Core(TM) i9-12900K
[codecarbon INFO @ 13:54:17] >>> Tracker's metadata:
[codecarbon INFO @ 13:54:17]   Platform system: Linux-5.15.0-113-generic-x86_64-with-glibc2.31
[codecarbon INFO @ 13:54:17]   Python version: 3.9.0
[codecarbon INFO @ 13:54:17]   CodeCarbon version: 2.3.4
[codecarbon INFO @ 13:54:17]   Available RAM : 125.578 GB
[codecarbon INFO @ 13:54:17]   CPU count: 24
[codecarbon INFO @ 13:54:17]   CPU model: 12th Gen Intel(R) Core(TM) i9-12900K
[codecarbon INFO @ 13:54:17]   GPU count: 1
[codecarbon INFO @ 13:54:17]   GPU model: 1 x NVIDIA GeForce RTX 3090
[codecarbon INFO @ 13:54:19] Energy consumed for RAM : 0.000016 kWh. RA

Model Size: 46.84 MB (disk), 11.69 M parameters
GPU Latency: 13.116 ms (± 0.038 ms)
GPU Throughput: 4879.53 inferences/sec
CPU Latency: 502.583 ms (± 10.011 ms)
CPU Throughput: 127.34 inferences/sec
Model MACs: 116.26 B
Average GPU Memory Usage: 227.64 MB
Peak GPU Memory Usage: 638.43 MB
Average Carbon Emissions per Inference: 0.000528 gCO2e
Average Energy Consumption per Inference: 0.001392 Wh


{'disk_size': 46835512,
 'num_parameters': 11689512,
 'gpu_latency': 13.116016960144043,
 'gpu_throughput': 4879.530134375272,
 'cpu_latency': 502.5826620403677,
 'cpu_throughput': 127.34223608147367,
 'macs': 116259684352,
 'avg_gpu_memory': 227642368.0,
 'peak_gpu_memory': 638428160,
 'avg_emissions': 5.278229550241391e-07,
 'avg_energy': 1.391717963993406e-06}