In [None]:
# Base setup: Colab secrets, repo clone, env vars
import os, pathlib
try:
    from google.colab import userdata  # type: ignore
    GH_TOKEN = userdata.get('GH_TOKEN')
    OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
    GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
    AGNITRAAI_REPO = userdata.get('AGNITRAAI_REPO')
    if OPENAI_API_KEY: os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
    if GOOGLE_API_KEY: os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY
    repo_path = pathlib.Path('/content/agnitraai')
    if pathlib.Path('/content').exists() and not repo_path.exists() and GH_TOKEN and AGNITRAAI_REPO:
        repo_url = f'https://{GH_TOKEN}@{AGNITRAAI_REPO}'
        get_ipython().system(f'git clone {repo_url} {repo_path}')
        os.chdir(repo_path)
    elif repo_path.exists():
        os.chdir(repo_path)
except Exception:
    pass


In [None]:
# Install Agnitra locally so `agnitra` CLI is available in Colab
import pathlib, os
repo_path = pathlib.Path('/content/agnitraai')
if repo_path.exists():
    get_ipython().system('python -m pip install -q -e /content/agnitraai')
else:
    get_ipython().system('python -m pip install -q -e .')
# Quick sanity: show help (fallback to module if console script not found)
_ = get_ipython().system('agnitra --help >/dev/null 2>&1 || python -m agnitraai.cli.main --help >/dev/null 2>&1')
print('Agnitra CLI available.')


# Agnitra CLI Demo â€” TinyLlama

This notebook demonstrates a clean, CLI-first workflow:
- Prepare a tiny TinyLlama-like model
- Run profiling via `agnitra profile ...` (CPU/GPU auto-detected)
- Load telemetry artifacts and visualize key results


In [None]:
# Minimal environment check and CLI availability
import shutil, subprocess, sys, json
try:
    import torch
    print(f'[Env] CUDA available: {torch.cuda.is_available()}')
    if torch.cuda.is_available():
        print('[Env] GPU:', torch.cuda.get_device_name(0))
    else:
        print('[Env] CPU-only runtime')
except Exception as e:
    print('[Env] Torch check skipped:', e)

if shutil.which('agnitra') is None:
    print('[Setup] Installing local CLI ...')
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', '-e', 'agnitraai'])
else:
    print('[Setup] agnitra CLI found')


## Prepare tiny model
Generates `tinyllama.pt` (TorchScript) and a small info JSON.

In [None]:
import subprocess, sys
cmd = [sys.executable, 'prepare_tinyllama.py']
print('$', ' '.join(cmd))
subprocess.check_call(cmd)


## Profile using the Agnitra CLI
Runs the CLI and prints only the CLI's stdout for a clean view.

In [None]:
import os, shlex, subprocess, shutil, sys
env = os.environ.copy()
env['TF_CPP_MIN_LOG_LEVEL'] = '3'
env['GYM_DISABLE_WARNINGS'] = '1'
base_cmd = 'agnitra profile tinyllama.pt --input-shape 1,16,64 --output telemetry.json'
if shutil.which('agnitra') is None:
    base_cmd = f'{sys.executable} -m cli.main profile tinyllama.pt --input-shape 1,16,64 --output telemetry.json'
print('$', base_cmd)
res = subprocess.run(shlex.split(base_cmd), env=env, check=True, capture_output=True, text=True)
print(res.stdout)  # keep CLI output clean (stderr suppressed)


## Load telemetry and summarize

In [None]:
from pathlib import Path
import json
root = Path('agnitraai')/ 'context'
telemetry = json.loads(Path('telemetry.json').read_text())
train = json.loads(Path('telemetry_train.json').read_text()) if Path('telemetry_train.json').exists() else {'events': []}
summary = json.loads((root / 'profile_result_tinyllama.json').read_text())

def to_ms_ns(v):
    try: return float(v)/1e6
    except: return 0.0

total_cpu_ms = sum(to_ms_ns(e.get('cpu_time_total',0.0)) for e in telemetry.get('events', []))
total_cuda_ms = sum(to_ms_ns(e.get('cuda_time_total',0.0)) for e in telemetry.get('events', []))
print('[Summary] device:', summary.get('device'))
print('[Summary] events:', len(telemetry.get('events', [])))
print('[Summary] CPU total ms (inference):', round(total_cpu_ms, 3))
print('[Summary] CUDA total ms (inference):', round(total_cuda_ms, 3))
print('[Summary] training events:', len(train.get('events', [])))


## Visualize top events

In [None]:
import matplotlib.pyplot as plt
from collections import defaultdict
ev = telemetry.get('events', [])
def top(evts, key, n=10):
    return sorted(evts, key=lambda e: e.get(key, 0.0), reverse=True)[:n]
# Prefer CPU on CPU-only runs
top_cpu = top(ev, 'cpu_time_total')
names = [e.get('name','') for e in top_cpu]
vals = [e.get('cpu_time_total',0.0)/1e6 for e in top_cpu]
plt.figure(figsize=(10,4))
plt.barh(names[::-1], vals[::-1])
plt.xlabel('CPU time (ms)')
plt.title('Top ops by CPU time (inference)')
plt.tight_layout(); plt.show()


## Artifact paths

In [None]:
from pathlib import Path
out = [
    'telemetry.json',
    'telemetry_train.json',
    'agnitraai/context/layer_log_tinyllama.json',
    'agnitraai/context/profile_result_tinyllama.json',
]
for p in out:
    print('[Artifact]', p, 'exists:', Path(p).exists())
