# Qwen2-VL Playground

Mount Google Drive and clone the repository if needed.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
REPO_DIR = '/content/VLM_Studies'
if not os.path.exists(REPO_DIR):
    !git clone https://github.com/PhilSaad333/VLM_Studies.git {REPO_DIR}
os.chdir(REPO_DIR)
print(f'Working directory: {os.getcwd()}')

In [None]:
!pip install -q -r requirements.txt

## Sample NLVR2 examples

In [None]:
!python scripts/sample_gallery.py --split validation --streaming --streaming-take 512 --num-samples 3 --save-dir /content/drive/MyDrive/VLM_Studies_Files/analysis/sample_gallery

## Manual prompt run
Use `run_on_uid` or `generate_from_sample` to work with the cached dataset without rerunning CLI scripts.

In [None]:
from pathlib import Path
import sys

DATA_ROOT = Path('/content/drive/MyDrive/VLM_Studies_Files')
sys.path.insert(0, str(Path.cwd()))

from vlm_datasets.nlvr2 import NLVR2DataConfig, load_nlvr2, find_sample_by_uid
from utils.prompts import build_conversation
from utils.uid_cache import UIDCache, cycle_uids

cache_dir = DATA_ROOT / 'datasets' / 'nlvr2_cache'
cache_dir.mkdir(parents=True, exist_ok=True)
dataset_cfg = NLVR2DataConfig(split='validation', streaming=False, cache_dir=str(cache_dir))
full_dataset = load_nlvr2(dataset_cfg)

UID_CACHE_PATH = DATA_ROOT / 'analysis' / 'uid_cache.json'
uid_cache = UIDCache(UID_CACHE_PATH)
print('Loaded validation split with', len(full_dataset), 'examples')
print('Cached UIDs:', len(uid_cache))


In [None]:
from transformers import AutoModelForVision2Seq, AutoProcessor
import torch

MODEL_NAME = 'Qwen/Qwen2-VL-2B-Instruct'
processor = AutoProcessor.from_pretrained(MODEL_NAME, trust_remote_code=True)
model = AutoModelForVision2Seq.from_pretrained(
    MODEL_NAME, trust_remote_code=True, device_map='auto'
)
model.eval()
print('Model loaded')


In [None]:
from pathlib import Path
from IPython.display import display
import json

def display_sample(sample):
    print(f"UID: {sample['uid']} | Label: {sample['label']}")
    print(sample['sentence'])
    for img in sample['images']:
        display(img)

def generate_from_sample(sample, question, *, system=None, temperature=0.0, top_p=1.0, max_new_tokens=64, log_path=None):
    messages = build_conversation(sample['images'], question, system_prompt=system)
    inputs = processor.apply_chat_template(
        messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors='pt'
    )
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    with torch.no_grad():
        output = model.generate(
            **inputs, do_sample=temperature > 0, temperature=temperature, top_p=top_p,
            max_new_tokens=max_new_tokens, pad_token_id=processor.tokenizer.eos_token_id
        )
    input_len = inputs['attention_mask'].sum(dim=1)[0]
    generated = output[0, input_len:]
    response = processor.tokenizer.decode(generated, skip_special_tokens=True).strip()
    record = {
        'uid': sample['uid'],
        'question': question,
        'response': response,
        'temperature': temperature,
        'top_p': top_p,
        'max_new_tokens': max_new_tokens,
        'system': system,
    }
    if log_path is not None:
        log_path = Path(log_path)
        log_path.parent.mkdir(parents=True, exist_ok=True)
        with log_path.open('a', encoding='utf-8') as f:
            json.dump(record, f)
            f.write('\n')
    return record

def cache_uids(uids):
    if isinstance(uids, str):
        uids = [uids]
    uid_cache.extend(uids)
    print('Cached UIDs:', len(uid_cache))

def pop_cached_sample(uid=None):
    for candidate in cycle_uids(uid_cache, start=uid):
        sample = find_sample_by_uid(full_dataset, candidate, limit=None)
        display_sample(sample)
        return candidate
    print('Cache empty')

def run_on_uid(uid, question, **kwargs):
    sample = find_sample_by_uid(full_dataset, uid, limit=None)
    display_sample(sample)
    result = generate_from_sample(sample, question, **kwargs)
    print('\nResponse:\n' + result['response'])
    return result


In [None]:
example_result = run_on_uid(
    uid='dev-850-0-0',
    question='Do the two images depict the same walkway? Answer True or False.',
    temperature=0.0,
    top_p=1.0,
    max_new_tokens=32,
    log_path=DATA_ROOT / 'analysis' / 'playground_runs.jsonl'
)
