# Qwen2-VL Playground

Mount Google Drive and clone the repository if needed.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
REPO_DIR = '/content/VLM_Studies'
if not os.path.exists(REPO_DIR):
    !git clone https://github.com/PhilSaad333/VLM_Studies.git {REPO_DIR}
os.chdir(REPO_DIR)
print(f'Working directory: {os.getcwd()}')

In [None]:
!pip install -q -r requirements.txt

## Sample NLVR2 examples

In [None]:
!python scripts/sample_gallery.py --split validation --streaming --streaming-take 512 --num-samples 3 --save-dir /content/drive/MyDrive/VLM_Studies_Files/analysis/sample_gallery

## Manual prompt run

In [None]:
!python scripts/model_playground.py \n    --model-name Qwen/Qwen2-VL-2B-Instruct \n    --uid dev-0-0 \n    --split validation \n    --streaming \n    --streaming-take 512 \n    --question 'Are both pictures showing boats on water? Answer True or False.' \n    --temperature 0.0 \n    --max-new-tokens 16 \n    --log-jsonl /content/drive/MyDrive/VLM_Studies_Files/analysis/playground_runs.jsonl

In [None]:
from pathlib import Path
import sys

DATA_ROOT = Path('/content/drive/MyDrive/VLM_Studies_Files')
sys.path.insert(0, str(Path.cwd()))

from vlm_datasets.nlvr2 import NLVR2DataConfig, load_nlvr2, find_sample_by_uid
from utils.uid_cache import UIDCache, cycle_uids

cache_dir = DATA_ROOT / 'datasets' / 'nlvr2_cache'
cache_dir.mkdir(parents=True, exist_ok=True)
dataset_cfg = NLVR2DataConfig(split='validation', streaming=False, cache_dir=str(cache_dir))
full_dataset = load_nlvr2(dataset_cfg)

UID_CACHE_PATH = DATA_ROOT / 'analysis' / 'uid_cache.json'
uid_cache = UIDCache(UID_CACHE_PATH)
print('Loaded validation split with', len(full_dataset), 'examples')
print('Cached UIDs:', len(uid_cache))


uid_cache.uids

In [None]:
from utils.uid_cache import UIDCache, cycle_uids

UID_CACHE_PATH = DATA_ROOT / 'analysis' / 'uid_cache.json'
uid_cache = UIDCache(UID_CACHE_PATH)
print(f'Cached UIDs: {len(uid_cache)}')

In [None]:
from IPython.display import display

def display_sample(sample):
    print(f"UID: {sample['uid']} | Label: {sample['label']}")
    print(sample['sentence'])
    for img in sample['images']:
        display(img)

def cache_uids(uids):
    if isinstance(uids, str):
        uids = [uids]
    uid_cache.extend(uids)
    print(f'Cached {len(uid_cache)} total UIDs')

def pop_cached_sample(uid=None):
    for candidate in cycle_uids(uid_cache, start=uid):
        sample = find_sample_by_uid(full_dataset, candidate, limit=None)
        display_sample(sample)
        return candidate
    print('Cache empty')
