In [None]:
import csv
import time
import torch
import matplotlib.pyplot as plt

from tqdm import tqdm
from pathlib import Path
from cartoon_gan_origin.models.generator import Generator  


@torch.no_grad()
def benchmark_model(model, device, batch_sizes, image_size, warmup = 0, iters = 50):
    results = []

    model.to(device)
    model.eval()

    if device.type == "cuda":
        torch.cuda.empty_cache()
        torch.backends.cudnn.benchmark = True

    for batch_size in batch_sizes:
        dummy_input = torch.randn(batch_size, 3, image_size[0], image_size[1], device=device)

        # WARMUP for compile
        for _ in range(warmup):
            _ = model(dummy_input)

        # TIMING
        times = []
        for _ in tqdm(range(iters)):
            start = time.time()
            _ = model(dummy_input)
            times.append(time.time() - start)

        avg_time = sum(times) / len(times)
        throughput = batch_size / avg_time
        results.append({
            "device": device.type,
            "batch_size": batch_size,
            "avg_time_sec": avg_time,
            "throughput_fps": throughput,
        })
        print(f"{device.type} batch={batch_size} avg_time={avg_time*1000}ms throughput={throughput}img/s")

    return results

In [None]:
BATCH_SIZE = [1, 4, 8, 16, 32, 64]
IMG_SIZE = (128, 128)

WEIGHTS = 'cartoon_gan_origin/checkpoints/trained_netG.pth'
model_orig = Generator()
model_orig.load_state_dict(torch.load(WEIGHTS, map_location='cpu'))

WEIGHTS1 = 'cartoon_gan_origin/checkpoints/prun_distilation_G.pth'
model1 = torch.load(WEIGHTS1, map_location='cpu', weights_only=False)

WEIGHTS2 = 'cartoon_gan_origin/checkpoints/prun_distilation_G.pth'
model2 = torch.load(WEIGHTS2, map_location='cpu', weights_only=False)

WEIGHTS3 = 'cartoon_gan_origin/checkpoints/tune_trained_netG_20prun.pth'
model3 = torch.load(WEIGHTS3, map_location='cpu', weights_only=False)

cpu_stats, gpu_stats = [], []

In [None]:
sum([n.numel() for n in model_orig.parameters()]), sum([n.numel() for n in model1.parameters()]), sum([n.numel() for n in model2.parameters()]), sum([n.numel() for n in model3.parameters()])

In [None]:
# CPU TEST
cpu_device = torch.device("cpu")
cpu_stats_orig = benchmark_model(model_orig, cpu_device, BATCH_SIZE, IMG_SIZE, iters=50)
cpu_stats1 = benchmark_model(model1, cpu_device, BATCH_SIZE, IMG_SIZE, iters=50)
cpu_stats2 = benchmark_model(model2, cpu_device, BATCH_SIZE, IMG_SIZE, iters=50)
print('cpu test finish')

In [None]:
cpu_stats3 = benchmark_model(model3, cpu_device, BATCH_SIZE, IMG_SIZE, iters=50)

In [None]:
plt.figure(figsize=(10, 5))
plt.title(f'CPU Intel i5-12600KF CartoonGAN img{IMG_SIZE}')

plt.plot(BATCH_SIZE, [d["throughput_fps"] for d in cpu_stats_orig], marker="o", label='orig_model')
plt.plot(BATCH_SIZE, [d["throughput_fps"] for d in cpu_stats1], marker="o", label='prune10_model')
plt.plot(BATCH_SIZE, [d["throughput_fps"] for d in cpu_stats2], marker="o", label='prune20_model')
# plt.plot(BATCH_SIZE, [d["throughput_fps"] for d in cpu_stats3], marker="o", label='prune30_model')
plt.grid(linestyle="--", alpha=0.3)

plt.xlabel('Batch size')
plt.ylabel('FPS images/s')
plt.legend()
plt.savefig(f'test_results/{IMG_SIZE}_cpu.png')

In [None]:
all_stats = cpu_stats + gpu_stats

with open(f"test_results/benchmark_results_{tag}.csv", "w", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=["device", "batch_size", "avg_time_sec", "throughput_fps"])
    writer.writeheader()
    writer.writerows(all_stats)