In [4]:
# Model Configuration
CONV_TYPE = "fuse"  # "ori_aegnn" or "fuse"
MODEL_NAME = "evgnn_ncaltech_fuse.pth"
MODEL_PATH = "../results/TrainedModels"

# Dataset Configuration
DATASET = "ncaltech"  # "ncars" or "ncaltech"
DATASET_PATHS = {
    "ncars": r"/Users/hannes/Documents/University/Datasets/raw_ncars/Prophesee_Dataset_n_cars",
    "ncaltech": r"/Users/hannes/Documents/University/Datasets/raw_ncaltec"
}

# Evaluation Configuration
NUM_SAMPLES = 100
EVENTS_PER_SAMPLE = 5000
N_EVENTS_SAMPLE = 10000
OUTPUT_DIR = "../results/async_test_results"

# Graph Construction Parameters
# IMPORTANT: Training used NORMALIZED timestamps with beta=0.5e-5, NOT microseconds!
# See training_scripts/EVGNN_AEGNN_training.ipynb line 115-117
RADIUS = 3.0  # Must match training
MAX_NUM_NEIGHBORS = 16  # Must match training
MAX_DT = 0.001  # For NORMALIZED timestamps (not microseconds)
BETA = 0.5e-5  # Time normalization factor - MATCHES TRAINING!
USE_MICROSECONDS = False  # Model was trained with normalized timestamps!

# Device Configuration
DEVICE = "cpu"

In [5]:
import os
import sys
import time
import gc
import torch
import numpy as np
import json
from copy import deepcopy
from torch_geometric.data import Data
from torch_geometric.nn.pool import radius_graph
from torch_geometric.transforms import Cartesian
from tqdm import tqdm

# Get project root (parent of testing_scripts) and add to path
project_root = os.path.abspath('..')
src_path = os.path.join(project_root, 'src')
if project_root not in sys.path:
    sys.path.insert(0, project_root)
if src_path not in sys.path:
    sys.path.insert(0, src_path)

from Models.CleanEvGNN.recognition import RecognitionModel as EvGNN
from Models.CleanEvGNN.asyncronous import make_model_asynchronous, reset_async_module
from Models.CleanEvGNN.asyncronous_aegnn import make_model_asynchronous as make_model_asynchronous_aegnn
from Models.CleanEvGNN.asyncronous_aegnn import reset_async_module as reset_async_module_aegnn
from Datasets.ncars import NCars
from Datasets.ncaltech101 import NCaltech
from Datasets.batching import BatchManager
from Models.utils import normalize_time, sub_sampling

# Optional: Power consumption tracking (only available on Linux with AIPowerMeter)
try:
    from Benchmarks.ModelTester import ModelTester
    POWER_TRACKING_AVAILABLE = True
    print("‚úì Power consumption tracking available")
except ImportError:
    POWER_TRACKING_AVAILABLE = False
    print("‚ö†Ô∏è  Power consumption tracking not available (AIPowerMeter not installed)")



‚ö†Ô∏è  Power consumption tracking not available (AIPowerMeter not installed)


## Load Datset and Model

In [6]:


dataset_path = DATASET_PATHS[DATASET]

if DATASET == 'ncars':
    dataset_obj = NCars(root=dataset_path)
    num_classes = len(NCars.get_info().classes)
    image_size = NCars.get_info().image_size
elif DATASET == 'ncaltech':
    dataset_obj = NCaltech(root=dataset_path)
    num_classes = len(NCaltech.get_info().classes)
    image_size = NCaltech.get_info().image_size
else:
    raise ValueError(f"Unknown dataset: {DATASET}")

dataset_obj.process(modes=["test"])
num_test_samples = dataset_obj.get_mode_length("test")

x

üìÇ Processing folder: gerenuk


gerenuk:   0%|          | 0/4 [00:00<?, ?it/s]


üìÇ Processing folder: hawksbill


hawksbill:   0%|          | 0/10 [00:00<?, ?it/s]


üìÇ Processing folder: headphone


Error processing line 1 of /opt/anaconda3/envs/GNNBenchmark/lib/python3.11/site-packages/distutils-precedence.pth:



headphone:   0%|          | 0/5 [00:00<?, ?it/s]


üìÇ Processing folder: ant


ant:   0%|          | 0/5 [00:00<?, ?it/s]


üìÇ Processing folder: butterfly


  Traceback (most recent call last):
    File "<frozen site>", line 195, in addpackage
    File "<string>", line 1, in <module>
  ModuleNotFoundError: No module named '_distutils_hack'

Remainder of file ignored


butterfly:   0%|          | 0/10 [00:00<?, ?it/s]


üìÇ Processing folder: lamp


lamp:   0%|          | 0/7 [00:00<?, ?it/s]


üìÇ Processing folder: strawberry


strawberry:   0%|          | 0/4 [00:00<?, ?it/s]


üìÇ Processing folder: water_lilly


water_lilly:   0%|          | 0/5 [00:00<?, ?it/s]


üìÇ Processing folder: chandelier


chandelier:   0%|          | 0/12 [00:00<?, ?it/s]


üìÇ Processing folder: dragonfly


dragonfly:   0%|          | 0/8 [00:00<?, ?it/s]


üìÇ Processing folder: crab


crab:   0%|          | 0/8 [00:00<?, ?it/s]


üìÇ Processing folder: pagoda


pagoda:   0%|          | 0/6 [00:00<?, ?it/s]


üìÇ Processing folder: dollar_bill


dollar_bill:   0%|          | 0/6 [00:00<?, ?it/s]


üìÇ Processing folder: emu


emu:   0%|          | 0/6 [00:00<?, ?it/s]


üìÇ Processing folder: inline_skate


inline_skate:   0%|          | 0/4 [00:00<?, ?it/s]


üìÇ Processing folder: platypus


platypus:   0%|          | 0/4 [00:00<?, ?it/s]


üìÇ Processing folder: dalmatian


dalmatian:   0%|          | 0/8 [00:00<?, ?it/s]


üìÇ Processing folder: cup


cup:   0%|          | 0/7 [00:00<?, ?it/s]


üìÇ Processing folder: airplanes


airplanes:   0%|          | 0/80 [00:00<?, ?it/s]


üìÇ Processing folder: joshua_tree


joshua_tree:   0%|          | 0/7 [00:00<?, ?it/s]


üìÇ Processing folder: cougar_body


cougar_body:   0%|          | 0/6 [00:00<?, ?it/s]


üìÇ Processing folder: grand_piano


grand_piano:   0%|          | 0/11 [00:00<?, ?it/s]


üìÇ Processing folder: trilobite


trilobite:   0%|          | 0/10 [00:00<?, ?it/s]


üìÇ Processing folder: brontosaurus


brontosaurus:   0%|          | 0/5 [00:00<?, ?it/s]


üìÇ Processing folder: wild_cat


wild_cat:   0%|          | 0/4 [00:00<?, ?it/s]


üìÇ Processing folder: pigeon


pigeon:   0%|          | 0/5 [00:00<?, ?it/s]


üìÇ Processing folder: dolphin


dolphin:   0%|          | 0/7 [00:00<?, ?it/s]


üìÇ Processing folder: soccer_ball


soccer_ball:   0%|          | 0/7 [00:00<?, ?it/s]


üìÇ Processing folder: wrench


wrench:   0%|          | 0/5 [00:00<?, ?it/s]


üìÇ Processing folder: scorpion


scorpion:   0%|          | 0/9 [00:00<?, ?it/s]


üìÇ Processing folder: flamingo_head


flamingo_head:   0%|          | 0/5 [00:00<?, ?it/s]


üìÇ Processing folder: nautilus


nautilus:   0%|          | 0/6 [00:00<?, ?it/s]


üìÇ Processing folder: accordion


accordion:   0%|          | 0/6 [00:00<?, ?it/s]


üìÇ Processing folder: cougar_face


cougar_face:   0%|          | 0/8 [00:00<?, ?it/s]


üìÇ Processing folder: pyramid


pyramid:   0%|          | 0/7 [00:00<?, ?it/s]


üìÇ Processing folder: camera


camera:   0%|          | 0/5 [00:00<?, ?it/s]


üìÇ Processing folder: barrel


barrel:   0%|          | 0/6 [00:00<?, ?it/s]


üìÇ Processing folder: schooner


schooner:   0%|          | 0/7 [00:00<?, ?it/s]


üìÇ Processing folder: cellphone


cellphone:   0%|          | 0/7 [00:00<?, ?it/s]


üìÇ Processing folder: panda


panda:   0%|          | 0/5 [00:00<?, ?it/s]


üìÇ Processing folder: revolver


revolver:   0%|          | 0/9 [00:00<?, ?it/s]


üìÇ Processing folder: lobster


lobster:   0%|          | 0/5 [00:00<?, ?it/s]


üìÇ Processing folder: menorah


menorah:   0%|          | 0/10 [00:00<?, ?it/s]


üìÇ Processing folder: lotus


lotus:   0%|          | 0/8 [00:00<?, ?it/s]


üìÇ Processing folder: stapler


stapler:   0%|          | 0/5 [00:00<?, ?it/s]


üìÇ Processing folder: crocodile


crocodile:   0%|          | 0/5 [00:00<?, ?it/s]


üìÇ Processing folder: chair


chair:   0%|          | 0/7 [00:00<?, ?it/s]


üìÇ Processing folder: helicopter


helicopter:   0%|          | 0/10 [00:00<?, ?it/s]


üìÇ Processing folder: minaret


minaret:   0%|          | 0/9 [00:00<?, ?it/s]


üìÇ Processing folder: starfish


starfish:   0%|          | 0/10 [00:00<?, ?it/s]


üìÇ Processing folder: ceiling_fan


ceiling_fan:   0%|          | 0/6 [00:00<?, ?it/s]


üìÇ Processing folder: ketch


ketch:   0%|          | 0/12 [00:00<?, ?it/s]


üìÇ Processing folder: mayfly


mayfly:   0%|          | 0/4 [00:00<?, ?it/s]


üìÇ Processing folder: wheelchair


wheelchair:   0%|          | 0/7 [00:00<?, ?it/s]


üìÇ Processing folder: bass


bass:   0%|          | 0/6 [00:00<?, ?it/s]


üìÇ Processing folder: yin_yang


yin_yang:   0%|          | 0/6 [00:00<?, ?it/s]


üìÇ Processing folder: crocodile_head


crocodile_head:   0%|          | 0/6 [00:00<?, ?it/s]


üìÇ Processing folder: saxophone


saxophone:   0%|          | 0/4 [00:00<?, ?it/s]


üìÇ Processing folder: beaver


beaver:   0%|          | 0/6 [00:00<?, ?it/s]


üìÇ Processing folder: mandolin


mandolin:   0%|          | 0/5 [00:00<?, ?it/s]


üìÇ Processing folder: bonsai


bonsai:   0%|          | 0/14 [00:00<?, ?it/s]


üìÇ Processing folder: Leopards


Leopards:   0%|          | 0/20 [00:00<?, ?it/s]


üìÇ Processing folder: car_side


car_side:   0%|          | 0/13 [00:00<?, ?it/s]


üìÇ Processing folder: ibis


ibis:   0%|          | 0/8 [00:00<?, ?it/s]


üìÇ Processing folder: electric_guitar


electric_guitar:   0%|          | 0/8 [00:00<?, ?it/s]


üìÇ Processing folder: kangaroo


kangaroo:   0%|          | 0/10 [00:00<?, ?it/s]


üìÇ Processing folder: stegosaurus


stegosaurus:   0%|          | 0/7 [00:00<?, ?it/s]


üìÇ Processing folder: ferry


ferry:   0%|          | 0/8 [00:00<?, ?it/s]


üìÇ Processing folder: snoopy


snoopy:   0%|          | 0/4 [00:00<?, ?it/s]


üìÇ Processing folder: umbrella


umbrella:   0%|          | 0/8 [00:00<?, ?it/s]


üìÇ Processing folder: rhino


rhino:   0%|          | 0/7 [00:00<?, ?it/s]


üìÇ Processing folder: okapi


okapi:   0%|          | 0/5 [00:00<?, ?it/s]


üìÇ Processing folder: watch


watch:   0%|          | 0/25 [00:00<?, ?it/s]


üìÇ Processing folder: brain


brain:   0%|          | 0/11 [00:00<?, ?it/s]


üìÇ Processing folder: gramophone


gramophone:   0%|          | 0/6 [00:00<?, ?it/s]


üìÇ Processing folder: scissors


scissors:   0%|          | 0/5 [00:00<?, ?it/s]


üìÇ Processing folder: rooster


rooster:   0%|          | 0/6 [00:00<?, ?it/s]


üìÇ Processing folder: cannon


cannon:   0%|          | 0/5 [00:00<?, ?it/s]


üìÇ Processing folder: binocular


binocular:   0%|          | 0/4 [00:00<?, ?it/s]


üìÇ Processing folder: anchor


anchor:   0%|          | 0/5 [00:00<?, ?it/s]


üìÇ Processing folder: octopus


octopus:   0%|          | 0/4 [00:00<?, ?it/s]


üìÇ Processing folder: buddha


buddha:   0%|          | 0/9 [00:00<?, ?it/s]


üìÇ Processing folder: laptop


laptop:   0%|          | 0/9 [00:00<?, ?it/s]


üìÇ Processing folder: windsor_chair


windsor_chair:   0%|          | 0/7 [00:00<?, ?it/s]


üìÇ Processing folder: hedgehog


hedgehog:   0%|          | 0/6 [00:00<?, ?it/s]


üìÇ Processing folder: pizza


pizza:   0%|          | 0/6 [00:00<?, ?it/s]


üìÇ Processing folder: euphonium


euphonium:   0%|          | 0/7 [00:00<?, ?it/s]


üìÇ Processing folder: stop_sign


stop_sign:   0%|          | 0/7 [00:00<?, ?it/s]


üìÇ Processing folder: Motorbikes


Motorbikes:   0%|          | 0/81 [00:00<?, ?it/s]


üìÇ Processing folder: sea_horse


sea_horse:   0%|          | 0/7 [00:00<?, ?it/s]


üìÇ Processing folder: flamingo


flamingo:   0%|          | 0/8 [00:00<?, ?it/s]


üìÇ Processing folder: ewer


ewer:   0%|          | 0/9 [00:00<?, ?it/s]


üìÇ Processing folder: garfield


garfield:   0%|          | 0/4 [00:00<?, ?it/s]


üìÇ Processing folder: crayfish


crayfish:   0%|          | 0/7 [00:00<?, ?it/s]


üìÇ Processing folder: Faces_easy


Faces_easy:   0%|          | 0/44 [00:00<?, ?it/s]


üìÇ Processing folder: sunflower


sunflower:   0%|          | 0/9 [00:00<?, ?it/s]


üìÇ Processing folder: llama


llama:   0%|          | 0/9 [00:00<?, ?it/s]


üìÇ Processing folder: elephant


elephant:   0%|          | 0/7 [00:00<?, ?it/s]


üìÇ Processing folder: tick


tick:   0%|          | 0/6 [00:00<?, ?it/s]


üìÇ Processing folder: metronome


metronome:   0%|          | 0/4 [00:00<?, ?it/s]

In [7]:
device = torch.device(DEVICE)
img_shape = (image_size[1], image_size[0])

model = EvGNN(
    network="graph_res",
    dataset=DATASET,
    num_classes=num_classes,
    img_shape=img_shape,
    dim=3,
    conv_type=CONV_TYPE,
    distill=False
).to(device)

model_path = os.path.join(MODEL_PATH, MODEL_NAME)
if not os.path.exists(model_path):
    raise FileNotFoundError(f"Model not found: {model_path}")

model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()

RecognitionModel(
  (criterion): CrossEntropyLoss()
  (model): GraphRes(
    (fuse1): MyConvBNReLU(1, 16)
    (fuse2): MyConvBNReLU(16, 32)
    (fuse3): MyConvBNReLU(32, 32)
    (fuse4): MyConvBNReLU(32, 32)
    (pool): MaxPoolingX(voxel_size=tensor([16., 16.]), size=180)
    (fc): qLinear(
      (lin): Linear(in_features=5760, out_features=101, bias=False)
      (obs_in): MinMaxObserver(min_val=0.0, max_val=5.838122844696045)
      (obs_out): MinMaxObserver(min_val=-26.6807918548584, max_val=8.962950706481934)
      (obs_w): MinMaxObserver(min_val=-0.07638765871524811, max_val=0.04301139712333679)
    )
    (drop): Dropout(p=0.0, inplace=False)
  )
)

In [8]:
def transform_sample(sample, device):
    """Apply preprocessing to match original AEGNN dataset pre_transform"""
    sample = sample.to(device)

    # Normalize polarity
    sample.x = torch.where(sample.x == -1., 0., sample.x)

    # Subsample events
    sample = sub_sampling(sample, n_samples=N_EVENTS_SAMPLE, sub_sample=True)

    if USE_MICROSECONDS:
        sample.pos[:, 2] = torch.round(sample.pos[:, 2] * 1e6)
    else:
        sample.pos[:, 2] = normalize_time(sample.pos[:, 2], beta=BETA)

    # Build graph with standard radius_graph
    sample.edge_index = radius_graph(sample.pos, r=RADIUS, max_num_neighbors=MAX_NUM_NEIGHBORS)

    # Add edge attributes
    edge_attr_fn = Cartesian(cat=False, max_value=RADIUS)
    sample.edge_attr = edge_attr_fn(sample).edge_attr

    return sample

## Setup Async Model and test

In [9]:

test_loader = BatchManager(dataset=dataset_obj, batch_size=1, mode="test")
num_samples = min(NUM_SAMPLES, num_test_samples)

edge_attributes = Cartesian(norm=True, cat=False)

model_for_async = deepcopy(model)

if CONV_TYPE == 'ori_aegnn':
    async_model = make_model_asynchronous_aegnn(model_for_async, r=RADIUS, edge_attributes=edge_attributes,
                                                log_flops=False, log_runtime=False)
    reset_async_fn = reset_async_module_aegnn
else:
    async_model = make_model_asynchronous(model_for_async, r=RADIUS, max_num_neighbors=MAX_NUM_NEIGHBORS,
                                          max_dt=MAX_DT, edge_attributes=edge_attributes,
                                          log_flops=False, log_runtime=False)
    reset_async_fn = reset_async_module



## Run Full Async Evaluation

In [10]:

# Initialize power consumption tracking
model_tester = None
if POWER_TRACKING_AVAILABLE and sys.platform == "linux":
    power_output_dir = os.path.join(OUTPUT_DIR, "power_consumption")
    os.makedirs(power_output_dir, exist_ok=True)
    model_tester = ModelTester(
        results_path=power_output_dir,
        model=async_model
    )
    print("‚úì Power consumption tracking initialized")
elif POWER_TRACKING_AVAILABLE:
    print("‚ö†Ô∏è  Power tracking only works on Linux, skipping measurement")

per_event_latencies = []
all_predictions = []
all_targets = []
predictions_per_event = []
successful_samples = 0
failed_samples = 0

gc.collect()
if device.type == 'cuda':
    torch.cuda.empty_cache()

events_to_process = min(EVENTS_PER_SAMPLE, N_EVENTS_SAMPLE)

##
##  Training loop with power measurement
##


# Use context manager for power measurement (like in training script)
if model_tester is not None:
    # Wrap evaluation loop with power measurement
    with model_tester:
        for i in tqdm(range(num_samples), desc="Async inference"):
            try:
                sample = next(test_loader)
                sample = transform_sample(sample, device)
                target_class = sample.y.item()
                all_targets.append(target_class)

                reset_async_fn(async_model)

                num_events = min(sample.num_nodes, events_to_process)
                sample_predictions = []

                with torch.no_grad():
                    for event_idx in range(num_events):
                        event_new = Data(
                            x=sample.x[event_idx:event_idx+1],
                            pos=sample.pos[event_idx:event_idx+1, :3],
                            batch=torch.zeros(1, dtype=torch.long),
                            edge_index=torch.empty((2, 0), dtype=torch.long),
                            edge_attr=torch.empty((0, 3), dtype=torch.float)
                        ).to(device)

                        event_start = time.perf_counter()
                        output = async_model(event_new)
                        latency = (time.perf_counter() - event_start) * 1000
                        per_event_latencies.append(latency)

                        pred = torch.argmax(output, dim=-1).item()
                        sample_predictions.append(pred)

                        if event_idx == num_events - 1:
                            all_predictions.append(pred)

                predictions_per_event.append(sample_predictions)
                successful_samples += 1

            except (IndexError, RuntimeError) as e:
                failed_samples += 1
                if len(all_targets) > len(all_predictions):
                    all_predictions.append(0)
                predictions_per_event.append([0])
                continue


##
## Training loop without power measurement
##

else:
    # No power measurement
    for i in tqdm(range(num_samples), desc="Async inference"):
        try:
            sample = next(test_loader)
            sample = transform_sample(sample, device)
            target_class = sample.y.item()
            all_targets.append(target_class)

            reset_async_fn(async_model)

            num_events = min(sample.num_nodes, events_to_process)
            sample_predictions = []

            with torch.no_grad():
                for event_idx in range(num_events):
                    event_new = Data(
                        x=sample.x[event_idx:event_idx+1],
                        pos=sample.pos[event_idx:event_idx+1, :3],
                        batch=torch.zeros(1, dtype=torch.long),
                        edge_index=torch.empty((2, 0), dtype=torch.long),
                        edge_attr=torch.empty((0, 3), dtype=torch.float)
                    ).to(device)

                    event_start = time.perf_counter()
                    output = async_model(event_new)
                    latency = (time.perf_counter() - event_start) * 1000
                    per_event_latencies.append(latency)

                    pred = torch.argmax(output, dim=-1).item()
                    sample_predictions.append(pred)

                    if event_idx == num_events - 1:
                        all_predictions.append(pred)

            predictions_per_event.append(sample_predictions)
            successful_samples += 1

        except (IndexError, RuntimeError) as e:
            failed_samples += 1
            if len(all_targets) > len(all_predictions):
                all_predictions.append(0)
            predictions_per_event.append([0])
            continue


Async inference: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 50/50 [04:24<00:00,  5.29s/it]


## Calculate Metrics


In [13]:

# Compute metrics
predictions = np.array(all_predictions)
targets = np.array(all_targets)
accuracy_final = (predictions == targets).mean()


# Compute latency statistics
latency_mean = float(np.mean(per_event_latencies))
latency_std = float(np.std(per_event_latencies))

# Compute accuracy evolution over events
max_events = max(len(preds) for preds in predictions_per_event)
accuracy_evolution = []
for event_idx in range(max_events):
    correct = sum(1 for sample_idx, sample_preds in enumerate(predictions_per_event)
                 if event_idx < len(sample_preds) and sample_preds[event_idx] == all_targets[sample_idx])
    total = sum(1 for sample_preds in predictions_per_event if event_idx < len(sample_preds))
    accuracy_evolution.append(correct / total if total > 0 else 0.0)

# Count correct/incorrect predictions
num_correct = int((predictions == targets).sum())
num_incorrect = successful_samples - num_correct

# Print results
print("\n" + "="*70)
print("ASYNCHRONOUS EVALUATION RESULTS")
print("="*70)
print(f"Accuracy: {accuracy_final:.4f} ({num_correct}/{successful_samples} correct)")
print(f"Latency:  {latency_mean:.4f} ¬± {latency_std:.4f} ms")
print(f"Samples:  {successful_samples}/{num_samples} successful")
print("="*70)

# Power consumption summary
power_summary = None
if model_tester is not None:
    print("\n" + "="*70)
    print("POWER CONSUMPTION")
    print("="*70)
    try:
        if model_tester._power_consumption_results_exist():
            model_tester.print_power_consumption()
            power_summary = model_tester.summarize_power_consumption()
            print("‚úì Power consumption data collected")
        else:
            print("‚ö†Ô∏è  No power consumption data found")
    except Exception as e:
        print(f"‚ö†Ô∏è  Could not retrieve power consumption: {e}")
    print("="*70)

# Save accuracy evolution to CSV
os.makedirs(OUTPUT_DIR, exist_ok=True)
model_name = os.path.splitext(MODEL_NAME)[0]
base_name = f"{model_name}_{DATASET}"

csv_path = os.path.join(OUTPUT_DIR, f"{base_name}_accuracy_evolution.csv")
with open(csv_path, 'w') as f:
    f.write("event_index,accuracy\n")
    for idx, acc in enumerate(accuracy_evolution):
        f.write(f"{idx},{acc:.6f}\n")

# Save metrics to JSON
json_path = os.path.join(OUTPUT_DIR, f"{base_name}_metrics.json")
all_metrics = {
    'model': MODEL_NAME,
    'dataset': DATASET,
    'accuracy': float(accuracy_final),
    'latency_mean_ms': latency_mean,
    'latency_std_ms': latency_std,
    'num_correct': num_correct,
    'num_incorrect': num_incorrect,
    'successful_samples': successful_samples,
    'failed_samples': failed_samples
}

# Add power consumption metrics if available
if power_summary is not None:
    all_metrics['power_consumption'] = power_summary

with open(json_path, 'w') as f:
    json.dump(all_metrics, f, indent=2)



ASYNCHRONOUS EVALUATION RESULTS
Accuracy: 0.3200 (16/50 correct)
Latency:  1.0255 ¬± 0.5143 ms
Samples:  50/50 successful


In [14]:
# # Analyze predictions - check if model predicted varied classes
# print("\n" + "="*70)
# print("PREDICTION ANALYSIS")
# print("="*70)
#
# # Count unique predictions
# unique_preds, pred_counts = np.unique(predictions, return_counts=True)
# print(f"\nTotal predictions: {len(predictions)}")
# print(f"Unique classes predicted: {len(unique_preds)}/{num_classes}")
# print(f"\nPrediction distribution:")
# for cls, count in zip(unique_preds, pred_counts):
#     percentage = (count / len(predictions)) * 100
#     print(f"  Class {int(cls)}: {count:3d} times ({percentage:5.1f}%)")
#
# # Check if model is stuck predicting one class
# if len(unique_preds) == 1:
#     print(f"\n‚ùå WARNING: Model predicted ONLY class {int(unique_preds[0])}!")
#     print(f"   The model is stuck and not working properly.")
# elif len(unique_preds) < num_classes / 2:
#     print(f"\n‚ö†Ô∏è  Model only uses {len(unique_preds)} out of {num_classes} classes")
#     print(f"   This may indicate a problem with the model or data.")
# else:
#     print(f"\n‚úì Model uses {len(unique_preds)} different classes (good diversity)")
#
# # Show ground truth distribution for comparison
# print(f"\nGround truth distribution:")
# unique_targets, target_counts = np.unique(targets, return_counts=True)
# for cls, count in zip(unique_targets, target_counts):
#     percentage = (count / len(targets)) * 100
#     print(f"  Class {int(cls)}: {count:3d} times ({percentage:5.1f}%)")
#
# # Per-class accuracy
# print(f"\nPer-class accuracy:")
# for cls in range(num_classes):
#     cls_mask = targets == cls
#     if cls_mask.sum() > 0:
#         cls_acc = (predictions[cls_mask] == targets[cls_mask]).mean()
#         cls_count = cls_mask.sum()
#         correct = int(cls_acc * cls_count)
#         print(f"  Class {cls}: {cls_acc:.4f} ({correct}/{cls_count} correct)")
#
# print("="*70)



PREDICTION ANALYSIS

Total predictions: 50
Unique classes predicted: 21/101

Prediction distribution:
  Class 0:   3 times (  6.0%)
  Class 2:   8 times ( 16.0%)
  Class 4:   3 times (  6.0%)
  Class 8:   3 times (  6.0%)
  Class 12:   4 times (  8.0%)
  Class 15:   5 times ( 10.0%)
  Class 16:   1 times (  2.0%)
  Class 19:   1 times (  2.0%)
  Class 23:   2 times (  4.0%)
  Class 29:   5 times ( 10.0%)
  Class 32:   1 times (  2.0%)
  Class 36:   1 times (  2.0%)
  Class 39:   1 times (  2.0%)
  Class 46:   5 times ( 10.0%)
  Class 53:   1 times (  2.0%)
  Class 57:   1 times (  2.0%)
  Class 66:   1 times (  2.0%)
  Class 76:   1 times (  2.0%)
  Class 87:   1 times (  2.0%)
  Class 94:   1 times (  2.0%)
  Class 99:   1 times (  2.0%)

‚ö†Ô∏è  Model only uses 21 out of 101 classes
   This may indicate a problem with the model or data.

Ground truth distribution:
  Class 0:   3 times (  6.0%)
  Class 2:   4 times (  8.0%)
  Class 3:   1 times (  2.0%)
  Class 4:   2 times (  4.0%)
