# üìò Edge AI Model Dataset Builder
This notebook benchmarks multiple AI models (image, audio, NLP) on your device, builds a latency/memory dataset, trains a surrogate model, and selects the optimal model.

In [None]:
!pip install psutil pynvml onnxruntime torch torchvision numpy pandas scikit-learn transformers requests

## üß© Step 1: Probe Device

In [None]:
from edge_selector.probe import DeviceProbe
import json
profile = DeviceProbe().run()
json.dump(profile, open('device_profile.json','w'), indent=2)
print('‚úÖ Device profile saved!')

## ‚öôÔ∏è Step 2: Benchmark All Registered Models

In [None]:
!python scripts/run_all_models.py

## üìä Step 3: Inspect Dataset

In [None]:
import pandas as pd, json
rows = [json.loads(l) for l in open('runs.ndjson')]
df = pd.DataFrame(rows)
display(df[['meta','framework','latency_ms','mem_mb']].head())
df.describe()

## üß† Step 4: Train Surrogate Model

In [None]:
from edge_selector.surrogate import Surrogate
s = Surrogate()
s.train('runs.ndjson')
print('‚úÖ Surrogate trained!')

## üßÆ Step 5: Select Best Model Based on SLA

In [None]:
import json
from edge_selector.selector import Selector
context = json.load(open('device_profile.json'))
actions = [{'model': m, 'params': 1e6, 'backend': 'torch', 'est_quality': 0.85} for m in df['meta'].apply(lambda x: x['model_name']).unique()]
sel = Selector(s)
print(sel.select(context, actions))

## üìà Step 6: Visualize Model Trade-offs

In [None]:
import matplotlib.pyplot as plt
plt.scatter(df['latency_ms'], df['mem_mb'])
plt.xlabel('Latency (ms)')
plt.ylabel('Memory (MB)')
plt.title('Model Latency vs Memory Usage')
for i,m in enumerate(df['meta'].apply(lambda x: x['model_name'])):
    plt.text(df['latency_ms'].iloc[i]+1, df['mem_mb'].iloc[i], m, fontsize=8)
plt.show()