In [30]:
import os, sys
from pathlib import Path
import json

import importlib

WORK_DIR = Path.cwd().parent

sys.path.append(str(WORK_DIR))

from src import main, datasets, prompt_formatters


In [2]:
CVRR_VAL = WORK_DIR / "notebooks/cvrr_val_updated.json"
CVRR_STSG = WORK_DIR / "data/datasets/CVRR/generated_stsg_cvrr.json"


In [25]:
importlib.reload(datasets)

prompt_formatter = prompt_formatters.OpenEndedPrompt("Here's the question: {question}\nGraph:\n{stsg}")
dataset = datasets.CVRRDataset(CVRR_VAL, prompt_formatter)



Dataset Statistics:
QA File: cvrr_val_updated.json
Number of QA samples: 2400
QA sample keys: dimension_name, subset, question_id, question, video_id, answer



In [20]:
dataset.q_id_key


'question_id'

In [21]:
dataset[1]


{'dimension_name': 'Continuity and Object Instance Count',
 'subset': 'continuity_and_object_instance_count',
 'question_id': 'b27163c5-d341-475b-8517-54809872081a',
 'question': 'Does the pink cube and shiny pink cube undergo any covering or stacking interactions with other objects?',
 'video_id': 'continuity_and_object_instance_count_183',
 'answer': 'No, the pink cube and shiny pink cube remain uncovered throughout the simulation and do not participate in any stacking interactions, performing only independent motions.',
 'qid': 'b27163c5-d341-475b-8517-54809872081a',
 'prompt': "Here's the question: Does the pink cube and shiny pink cube undergo any covering or stacking interactions with other objects?"}

In [22]:
print(f"question_id: {dataset[0]['qid']}")
print(dataset[0]['prompt'])


question_id: 2a49d339-11d1-4c70-b6df-8f3f1e4bbcc0
Here's the question: How many times is the golden hollow sphere gets covered by other objects?


## STSG merge

In [32]:
user_prompt_path = WORK_DIR / "data/prompts/open-qa/CVRR/user_prompt.txt"
prompt_formatter = prompt_formatters.OpenEndedPrompt(main._load_prompt_fromfile(user_prompt_path))
dataset = datasets.CVRRDataset(
    CVRR_VAL,
    prompt_formatter,
    CVRR_STSG)



Dataset Statistics:
QA File: cvrr_val_updated.json
Number of QA samples: 2400
QA sample keys: dimension_name, subset, question_id, question, video_id, answer

STSG File: generated_stsg_cvrr.json
Number of unique video IDs with STSG: 193



In [33]:
dataset[0]


{'dimension_name': 'Continuity and Object Instance Count',
 'subset': 'continuity_and_object_instance_count',
 'question_id': '2a49d339-11d1-4c70-b6df-8f3f1e4bbcc0',
 'question': 'How many times is the golden hollow sphere gets covered by other objects?',
 'video_id': 'continuity_and_object_instance_count_183',
 'answer': 'The golden hollow sphere is covered two times by other objects.',
 'stsg': '\nFrame 0:\n\n\npurple_cube ---- above ---- yellow_sphere\npurple_cube ---- adjacent_to ---- green_cone\npurple_cube ---- adjacent_to ---- red_cone\ngreen_cone ---- to_the_left_of ---- purple_cube\nred_cone ---- to_the_right_of ---- purple_cube\nyellow_sphere ---- below ---- purple_cube\ngreen_cone ---- positioned_near ---- purple_cube\nred_cone ---- positioned_near ---- purple_cube\nyellow_sphere ---- directly_under ---- purple_cube\npurple_cube ---- supporting ---- yellow_sphere\n\nFrame 1:\n\n\nred_sphere ---- above ---- green_cone\ngreen_cone ---- above ---- purple_cube\npurple_cube ---- 

In [34]:
print(dataset[0]['prompt'])


# Video Question-Answering User Prompt Template

## Question
How many times is the golden hollow sphere gets covered by other objects?

## Spatio-Temporal Scene Graph Representation

Frame 0:


purple_cube ---- above ---- yellow_sphere
purple_cube ---- adjacent_to ---- green_cone
purple_cube ---- adjacent_to ---- red_cone
green_cone ---- to_the_left_of ---- purple_cube
red_cone ---- to_the_right_of ---- purple_cube
yellow_sphere ---- below ---- purple_cube
green_cone ---- positioned_near ---- purple_cube
red_cone ---- positioned_near ---- purple_cube
yellow_sphere ---- directly_under ---- purple_cube
purple_cube ---- supporting ---- yellow_sphere

Frame 1:


red_sphere ---- above ---- green_cone
green_cone ---- above ---- purple_cube
purple_cube ---- on ---- surface
red_sphere ---- above ---- green_cone
green_cone ---- looking_at ---- purple_cube
red_sphere ---- looking_at ---- green_cone
purple_cube ---- on_top_of ---- surface
green_cone ---- attached_to ---- red_sphere
surface ---- s

## Dataset for LLM-as-Judge

In [35]:
CVRR_VAL = WORK_DIR / "test/test_files/llm-judge/cvrr_qa.json"
CVRR_RESP = WORK_DIR / "test/test_files/llm-judge/cvrr_response.json"

pformat_fpath = WORK_DIR / "test/test_files/llm-judge/llm_as_judge_test_prompt.txt"


In [36]:
prompt_formatter = prompt_formatters.LlmAsJudgePrompt(main._load_prompt_fromfile(pformat_fpath))
dataset = datasets.CVRRDataset(CVRR_VAL, prompt_formatter)

judge_datset = datasets.JudgeDataset(dataset, CVRR_RESP, prompt_formatter)



Dataset Statistics:
QA File: cvrr_qa.json
Number of QA samples: 1
QA sample keys: question_id, question, video_id, answer



In [37]:
judge_datset[0]


{'question_id': 1,
 'question': 'Who are you',
 'video_id': 'dev_null',
 'answer': 'No one',
 'gt_answer': 'No one',
 'response': 'me?',
 'qid': 1,
 'prompt': 'Q: Who are you\nA: No one\nP: me?\n\nJudge instructions...'}