In [1]:
# Enhanced configuration for multiview fusion + output saving
MODEL_KEY = 'qwen25'
MAX_NEW_TOKENS = 128
EXTRA_PROMPT = ''
OUTPUT_DIR = 'outputs'
from utils import ensure_dir
ensure_dir(OUTPUT_DIR)


'outputs'

# Experiment 2: Multiview Fusion (Camera + Map)

Compare action suggestions when the model receives both front camera and bird's-eye map images. Measure delta vs single-view baseline.

In [2]:
MODEL_KEY = 'qwen25'
MAX_NEW_TOKENS = 128
EXTRA_PROMPT = ''
GROUND_TRUTH = ['LEFT','STOP','SLOW','LEFT','STOP','SLOW']  # example; adjust or load

In [3]:
from PIL import Image
from utils import list_frame_pairs, build_multiview_prompt, generate_action, parse_action_json
pairs = list_frame_pairs(folder_name='example_edge_samples')
len(pairs)

6

In [4]:
prompt = build_multiview_prompt(EXTRA_PROMPT)
outputs = []
parsed = []
for cam_path, map_path in pairs:
    cam = Image.open(cam_path).convert('RGB')
    map_img = Image.open(map_path).convert('RGB')
    out = generate_action(MODEL_KEY, [cam, map_img], prompt, max_new_tokens=MAX_NEW_TOKENS)
    outputs.append(out)
    parsed.append(parse_action_json(out) or {})
parsed



Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

[{'action': 'LEFT',
  'rationale': 'The road curves to the left, indicating that turning left is the correct direction.'},
 {'action': 'FORWARD',
  'rationale': 'The robot is on a straight path with no obstacles or turns ahead.'},
 {'action': 'LEFT',
  'rationale': 'The robot is on a curved road, and the yellow dashed lines indicate a left turn. The robot should follow the curve.'},
 {'action': 'FORWARD',
  'rationale': 'The road is clear ahead, and there are no obstacles or traffic signals indicating a need to change direction.'},
 {'action': 'STOP',
  'rationale': 'The road is blocked by traffic cones, indicating that it is not safe to proceed forward.'},
 {'action': 'FORWARD',
  'rationale': 'The rubber duck is in the center of the road, indicating no immediate obstacles or need to change direction.'}]

In [5]:
actions = [p.get('action','?') for p in parsed]
actions

['LEFT', 'FORWARD', 'LEFT', 'FORWARD', 'STOP', 'FORWARD']

In [None]:
# Persist actions for multiview fusion
from utils import save_text, parse_action_json
if 'parsed' in globals():
    import json, os
    actions = [p.get('action','?') for p in parsed]
    summary = {'actions': actions}
    save_text(os.path.join(OUTPUT_DIR,'experiment2_multiview_summary.json'), json.dumps(summary, indent=2))
    summary
else:
    print('Run generation cells first.')

: 