In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.utils.prune as prune
import torch.optim as optim
import common_utils as utils
import wandb
from collections import defaultdict
import torch_pruning as tp
import gc
import matplotlib.pyplot as plt
import pickle
import copy
print(utils.device)

cpu


In [2]:
transform = transforms.Compose(
    [
    transforms.Resize((96, 96)),
    transforms.ToTensor(),
     transforms.Normalize((0.5), (0.5))
     ])


trainset = torchvision.datasets.FER2013(root='./', split="train",
                                        transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=16,
                                          shuffle=True, num_workers=8)

testset = torchvision.datasets.FER2013(root='./', split="test",
                                       transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=16,
                                         shuffle=False, num_workers=8)
# Use a batchsize of 1 to more accurately model individual frames from camera
benchmark_loader = torch.utils.data.DataLoader(testset, batch_size=1,
                                         shuffle=False, num_workers=8)

In [3]:
base_model = utils.BaseModel()
base_model.load_state_dict(torch.load("base_model/checkpoint_6.pth", map_location=utils.device))
base_model.to(utils.device)
pruned_model = torch.load("pruned_model/checkpoint_1.pth", weights_only=False, map_location=utils.device)
pruned_model.to(utils.device)

# Just requantize the model instead of loading jit tracse/state dicts.  Time saving
# is minimal all things considered.
quantized_base_model = utils.quantize_model(base_model, testloader)
quantized_pruned_model = utils.quantize_model(pruned_model, testloader)

compiled_base_model = copy.deepcopy(base_model)
compiled_base_model.compile()
compiled_pruned_model = copy.deepcopy(pruned_model)
compiled_pruned_model.compile()
compiled_quantized_base_model = copy.deepcopy(quantized_base_model)
compiled_quantized_base_model.compile()
compiled_quantized_pruned_model = copy.deepcopy(quantized_pruned_model)
compiled_quantized_pruned_model.compile()





In [6]:
models = [
    ("base_model", base_model),
    ("pruned_model", pruned_model),
    ("quantized_base_model", quantized_base_model),
    ("quantized_pruned_model", quantized_pruned_model),
    ("compiled_base_model", compiled_base_model),
    ("compiled_pruned_model", compiled_pruned_model),
    ("compiled_quantized_base_model", compiled_quantized_base_model),
    ("compiled_quantized_pruned_model", compiled_quantized_pruned_model),
]

In [None]:
benchmark_results = defaultdict(dict)
for (name, model) in models:
    run = wandb.init(project="hpml-final", name="{0} Benchmark".format(name))
    loss, acc = utils.test(model, testloader)
    benchmark_results[name]["test_loss"] = loss
    benchmark_results[name]["test_acc"] = acc
    utils.benchmark_model(model, benchmark_loader, 200) # warm start
    benchmark_results[name]["benchmark"] = utils.benchmark_model(model, benchmark_loader, 200)
    run.log(
        {
            "test/loss" : loss,
            "test/acc" : acc,
            "benchmark/images" : benchmark_results[name]["benchmark"]["total_images"],
            "benchmark/total_time" : benchmark_results[name]["benchmark"]["total_time"],
            "benchmark/mean_time" : benchmark_results[name]["benchmark"]["mean_time"],
            "benchmark/mean_fps" : benchmark_results[name]["benchmark"]["mean_fps"],
        }
    )
    run.finish()
with open("benchmark_results.pkl", "wb") as file:
    pickle.dump(benchmark_results, file)


In [4]:
with open("benchmark_results.pkl", "rb") as file:
    benchmark_results = pickle.load(file)
# fig, axs = plt.subplots(1, 4, figsize=(30, 12))
plt.figure(figsize=(10, 6))
key_order = [
    "base_model",
    "quantized_base_model",
    "pruned_model",
    "quantized_pruned_model",
    "compiled_base_model",
    "compiled_quantized_base_model",
    "compiled_pruned_model",
    "compiled_quantized_pruned_model"
]

test_losses = []
test_accuracies = []
mean_times = []
mean_frames_per_second = []

for key in key_order:
    test_losses.append(benchmark_results[key]["test_loss"])
    test_accuracies.append(benchmark_results[key]["test_acc"])
    mean_times.append(benchmark_results[key]["benchmark"]["mean_time"])
    mean_frames_per_second.append(benchmark_results[key]["benchmark"]["mean_fps"])


plt.plot(key_order[:4], test_accuracies[:4])
plt.title("Test Accuracies Per Model")
plt.ylabel("Accuracy")
plt.xlabel("Model")
plt.tight_layout()
plt.savefig("images/benchmark_accuracies.png")
plt.clf()

plt.plot(key_order[:4], test_losses[:4])
plt.title("Test Loss Per Model")
plt.ylabel("Loss")
plt.xlabel("Model")
plt.tight_layout()
plt.savefig("images/benchmark_losses.png")
plt.clf()

plt.plot(key_order[:4], mean_times[:4], label="Uncompiled")
plt.plot(key_order[:4], mean_times[4:], label="Compiled")
plt.title("Mean Inference Time Per Model")
plt.ylabel("Time (s)")
plt.xlabel("Model")
plt.legend()
plt.tight_layout()
plt.savefig("images/benchmark_inference_times.png")
plt.clf()

plt.plot(key_order[:4], mean_frames_per_second[:4],label="Uncompiled")
plt.plot(key_order[:4], mean_frames_per_second[4:], label="Compiled")
plt.title("Mean FPS(estimated) Per Model")
plt.ylabel("FPS")
plt.xlabel("Model")
plt.legend()
plt.tight_layout()
plt.savefig("images/benchmark_fps.png")
plt.clf()

<Figure size 1000x600 with 0 Axes>

In [7]:
for (name, model) in models: # do this after because we can't pickle these apparently
    benchmark_results[name]["profile"] = utils.profile_model(model, benchmark_loader, 200)

  warn("CUDA is not available, disabling CUDA profiling")
  warn("CUDA is not available, disabling CUDA profiling")
  warn("CUDA is not available, disabling CUDA profiling")
W1214 16:26:49.912000 8925 site-packages/torch/_dynamo/variables/tensor.py:913] [1/0] Graph break from `Tensor.item()`, consider setting:
W1214 16:26:49.912000 8925 site-packages/torch/_dynamo/variables/tensor.py:913] [1/0]     torch._dynamo.config.capture_scalar_outputs = True
W1214 16:26:49.912000 8925 site-packages/torch/_dynamo/variables/tensor.py:913] [1/0] or:
W1214 16:26:49.912000 8925 site-packages/torch/_dynamo/variables/tensor.py:913] [1/0]     env TORCHDYNAMO_CAPTURE_SCALAR_OUTPUTS=1
W1214 16:26:49.912000 8925 site-packages/torch/_dynamo/variables/tensor.py:913] [1/0] to include these operations in the captured graph.
W1214 16:26:49.912000 8925 site-packages/torch/_dynamo/variables/tensor.py:913] [1/0] 
W1214 16:26:49.912000 8925 site-packages/torch/_dynamo/variables/tensor.py:913] [1/0] Graph break: fro

In [15]:
with open("profile_results.txt", "w") as file:
    for (name, model) in models:
        file.write("*************************\n")
        file.write(name + "\n")
        file.write("*************************\n")
        file.write(benchmark_results[name]["profile"].key_averages().table(sort_by="self_cpu_time_total", row_limit=30))
        file.write("\n")