In [1]:
# Enhanced configuration and ground truth loading / output saving setup
MODEL_KEY = 'qwen25'
MAX_NEW_TOKENS = 128
EXTRA_PROMPT = ''
OUTPUT_DIR = 'outputs'
GROUND_TRUTH_PATH = 'example_edge_samples/edge_samples_ground_truth.json'
from utils import ensure_dir
ensure_dir(OUTPUT_DIR)
GROUND_TRUTH = []
import os, json
if os.path.isfile(GROUND_TRUTH_PATH):
    gt_map = json.load(open(GROUND_TRUTH_PATH,'r',encoding='utf-8'))
else:
    gt_map = {}


# Experiment 1: Single-View Baseline Action Prediction

Goal: Evaluate baseline performance of a VLM when given ONLY the front camera image for each frame. We request a JSON action + rationale.
Metrics: action accuracy (if ground truth available), JSON validity, latency per frame.

In [2]:
# Configuration
MODEL_KEY = 'qwen25'  # choose from utils.VLM_MODELS keys
MAX_NEW_TOKENS = 128
EXTRA_PROMPT = ''  # optional additional constraint
GROUND_TRUTH = ['LEFT','STOP','SLOW','LEFT','STOP','SLOW']  # example; adjust or load

In [3]:
import os, json, time
from PIL import Image
from utils import list_frame_pairs, build_single_view_prompt, generate_action, parse_action_json, action_accuracy
pairs = list_frame_pairs(folder_name='example_edge_samples')
pairs[:2]  # preview

[('/home/o0i3z3/thesis/vlm_laboratories/vlm_laboratories/vlm_decision_lab/example_edge_samples/frame01_cam.png',
  '/home/o0i3z3/thesis/vlm_laboratories/vlm_laboratories/vlm_decision_lab/example_edge_samples/frame01_map.png'),
 ('/home/o0i3z3/thesis/vlm_laboratories/vlm_laboratories/vlm_decision_lab/example_edge_samples/frame02_cam.png',
  '/home/o0i3z3/thesis/vlm_laboratories/vlm_laboratories/vlm_decision_lab/example_edge_samples/frame02_map.png')]

In [4]:
prompt = build_single_view_prompt(EXTRA_PROMPT)
results = []
parsed = []
latencies = []
for idx,(cam_path,map_path) in enumerate(pairs):
    img = Image.open(cam_path).convert('RGB')
    start = time.time()
    out = generate_action(MODEL_KEY, [img], prompt, max_new_tokens=MAX_NEW_TOKENS)
    latencies.append(time.time()-start)
    results.append(out)
    pj = parse_action_json(out) or {}
    parsed.append(pj)
parsed



Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

[{'action': 'FORWARD',
  'rationale': 'The road is clear ahead, and there are no obstacles or signs indicating a need to change direction.'},
 {'action': 'FORWARD',
  'rationale': 'The robot is on a straight road with no obstacles or traffic, so it should continue forward.'},
 {'action': 'LEFT',
  'rationale': 'The road curves to the left, indicating the need to turn left.'},
 {'action': 'FORWARD',
  'rationale': 'The road is clear ahead, and there are no obstacles or signs indicating a need to change direction.'},
 {'action': 'FORWARD',
  'rationale': 'The road is clear ahead, and there are no obstacles or traffic signals indicating otherwise.'},
 {'action': 'STOP',
  'rationale': 'There is an obstacle (a rubber duck) on the road, which could cause a collision.'}]

In [5]:
# Metrics
actions = [p.get('action','?') for p in parsed]
accuracy = action_accuracy(parsed, GROUND_TRUTH) if GROUND_TRUTH else None
validity = sum(1 for p in parsed if 'action' in p)/len(parsed) if parsed else 0
print({'accuracy': accuracy, 'json_validity': validity, 'avg_latency_s': sum(latencies)/len(latencies)})
actions

{'accuracy': 0.0, 'json_validity': 1.0, 'avg_latency_s': 36.94840463002523}


['FORWARD', 'FORWARD', 'LEFT', 'FORWARD', 'FORWARD', 'STOP']

In [None]:
# Save metrics summary at end
from utils import action_accuracy, parse_action_json, save_text
# If earlier cells ran, we expect variables: parsed, latencies, pairs
if 'parsed' in globals():
    actions = [p.get('action','?') for p in parsed]
    acc = action_accuracy(parsed, GROUND_TRUTH) if GROUND_TRUTH else None
    validity = sum(1 for p in parsed if 'action' in p)/len(parsed) if parsed else 0
    summary = {
        'actions': actions,
        'accuracy': acc,
        'json_validity': validity,
        'avg_latency_s': sum(latencies)/len(latencies) if latencies else None
    }
    import json, os
    save_text(os.path.join(OUTPUT_DIR,'experiment1_summary.json'), json.dumps(summary, indent=2))
    summary
else:
    print('Run generation cells first.')

: 