In [71]:
import os, sys
from pathlib import Path
import json

import pandas as pd

WORK_DIR = Path.cwd().parent

sys.path.append(str(WORK_DIR))

from src import (
    main,
    graph_gen,
    ollama_manager,
    prompt_formatters,
    batch_processor
)
from src.datasets import STARDataset


In [8]:
STAR_VAL_PATH = WORK_DIR / "data/datasets/STAR/STAR_annotations/STAR_val.json"
GEN_GRAPH_PATH = WORK_DIR / "outputs/gen_stsg_gemma3:12b-it-qat_20250522_14:30_99.jsonl"
GT_GRAPH_PATH = WORK_DIR / "data/datasets/STAR_verbalized_stsg_val.json"


In [3]:
user_promp_format = main._load_prompt_fromfile(WORK_DIR / "data/prompts/graph-sim/usr_prompt.txt")
pformatter = prompt_formatters.PromptFormatter(user_promp_format, fields=['gen-stsg', 'gt-stsg'])


In [5]:
sample = {'gt-stsg': 'BBBBBBBBBBBBBBBB', 'gen-stsg': 'AAAAAAAAAAAAAaa'}
print(pformatter.format(sample))


You are tasked with comparing generated Spatio-Temporal Scene Graphs (STSGs) against ground truth representations to determine their correctness.

CONSIDER A GENERATED STSG CORRECT IF:
- All major entities from the ground truth are present (allowing for hyponyms/synonyms)
- Spatial and temporal relationships are accurately represented
- Minor descriptive additions don't contradict the ground truth

CONSIDER A GENERATED STSG INCORRECT IF ANY OF THESE ERRORS OCCUR:
- Missing Critical Entity: Major entities from ground truth are absent in frames where they should appear
- Incorrect Spatial Relationships: Wrong predicates describing spatial connections (e.g., "on" vs "under", "left_of" vs "right_of")
- Incorrect Temporal Relationships: Wrong predicates describing temporal sequences (e.g., "before" vs "after", "during" vs "outside")
- Temporal Misalignment: Entities or relationships appearing in wrong time frames or durations
- Contradictory Predicates: Relationships that directly contradic

In [13]:
star = []
with open(STAR_VAL_PATH, 'r') as f:
    star = json.load(f)
    
star_df = pd.DataFrame(star)


In [14]:
gen_stsg = []
with open(GEN_GRAPH_PATH, 'r') as f:
    gen_stsg = [json.loads(line) for line in f.readlines()]
    
gen_stsg_df = pd.DataFrame(gen_stsg)
gen_stsg_df.iloc[0]


video_id                                                    6H78U
start                                                        15.6
end                                                          22.7
chat_history    [{'role': 'user', 'content': 'Look carefully a...
stsg            \nFrame 0:\n\nyoung_man ---- standing_on ---- ...
Name: 0, dtype: object

In [16]:
gt_stsg = []
with open(GT_GRAPH_PATH, 'r') as f:
    gt_stsg = json.load(f)
    

gt_stsg_df = pd.DataFrame(gt_stsg)
gt_stsg_df.iloc[0]


question_id                                    Interaction_T1_13
video_id                                                   6H78U
start                                                       11.1
end                                                         19.6
stsg           Frame 000198\n\tperson --- on_the_side_of --- ...
Name: 0, dtype: object

In [18]:
gen_stsg_df.shape, gen_stsg_df[['video_id', 'start', 'end']].value_counts().shape


((1094, 5), (870,))

In [55]:
unique_by_vid_gen


Unnamed: 0,video_id,start,end,chat_history,gen-stsg
0,6H78U,15.6,22.7,"[{'role': 'user', 'content': 'Look carefully a...",\nFrame 0:\n\nyoung_man ---- standing_on ---- ...
1,RNLTR,11.6,16.4,"[{'role': 'user', 'content': 'Look carefully a...",\nFrame 0:\n\nwoman ---- standing_in_front_of ...
2,VNQTH,2.7,9.2,"[{'role': 'user', 'content': 'Look carefully a...",\nFrame 0:\n\n\nwoman ---- wearing ---- white_...
3,Y79PC,16.9,22.6,"[{'role': 'user', 'content': 'Look carefully a...",\nFrame 0:\n\n\nwoman ---- standing_in ---- ro...
4,4GLAP,15.7,21.4,"[{'role': 'user', 'content': 'Look carefully a...",\nFrame 0:\n\nyoung_man ---- holding ---- plas...
...,...,...,...,...,...
1086,FRLW2,25.7,32.0,"[{'role': 'user', 'content': 'Look carefully a...",\nFrame 0:\n\n\nwoman ---- standing_in_front_o...
1088,3W6TL,13.1,20.1,"[{'role': 'user', 'content': 'Look carefully a...",\nFrame 0:\n\nbed ---- against ---- wall\nnigh...
1090,86GSE,10.1,20.0,"[{'role': 'user', 'content': 'Look carefully a...",\nFrame 0:\n\nwoman ---- wearing ---- red_dres...
1091,9J166,18.6,23.0,"[{'role': 'user', 'content': 'Look carefully a...",\nFrame 0:\n\nwoman ---- looking_at ---- windo...


In [57]:
unique_by_vid_gen = gen_stsg_df.groupby(['video_id', 'start', 'end']).nth(0)
unique_by_vid_gen = unique_by_vid_gen.rename(columns={'stsg': 'gen-stsg'})
unique_by_vid_gen = unique_by_vid_gen.drop(columns=['chat_history'])
unique_by_vid_gen


Unnamed: 0,video_id,start,end,gen-stsg
0,6H78U,15.6,22.7,\nFrame 0:\n\nyoung_man ---- standing_on ---- ...
1,RNLTR,11.6,16.4,\nFrame 0:\n\nwoman ---- standing_in_front_of ...
2,VNQTH,2.7,9.2,\nFrame 0:\n\n\nwoman ---- wearing ---- white_...
3,Y79PC,16.9,22.6,\nFrame 0:\n\n\nwoman ---- standing_in ---- ro...
4,4GLAP,15.7,21.4,\nFrame 0:\n\nyoung_man ---- holding ---- plas...
...,...,...,...,...
1086,FRLW2,25.7,32.0,\nFrame 0:\n\n\nwoman ---- standing_in_front_o...
1088,3W6TL,13.1,20.1,\nFrame 0:\n\nbed ---- against ---- wall\nnigh...
1090,86GSE,10.1,20.0,\nFrame 0:\n\nwoman ---- wearing ---- red_dres...
1091,9J166,18.6,23.0,\nFrame 0:\n\nwoman ---- looking_at ---- windo...


In [60]:
unique_by_vid_gt = gt_stsg_df.groupby(['video_id', 'start', 'end']).agg({
    'stsg': 'first',
    'question_id': list       # Collect all 'question_id's into a list
}).rename(columns={'question_id': 'q_ids', 'stsg': 'gt-stsg'}).reset_index()

unique_by_vid_gt


Unnamed: 0,video_id,start,end,gt-stsg,q_ids
0,013SD,0.0,4.30,Frame 000010\n\tperson --- holding --- box\n\t...,"[Interaction_T1_7912, Interaction_T2_6864, Int..."
1,01KML,18.1,26.80,Frame 000569\n\tperson --- standing_on --- flo...,"[Interaction_T1_4892, Interaction_T2_4264, Int..."
2,01ZWG,4.9,11.00,Frame 000080\n\tperson --- in_front_of --- tab...,"[Interaction_T1_2724, Interaction_T2_2495, Int..."
3,01ZWG,4.9,13.50,Frame 000080\n\tperson --- in_front_of --- tab...,"[Sequence_T1_2453, Sequence_T2_1855, Sequence_..."
4,01ZWG,7.2,13.50,Frame 000103\n\tperson --- touching --- table\...,"[Interaction_T1_2725, Interaction_T2_2496]"
...,...,...,...,...,...
3368,ZZ89F,0.0,9.60,Frame 000010\n\ttowel --- near --- shoe\nFrame...,"[Feasibility_T3_81, Feasibility_T4_81, Feasibi..."
3369,ZZ89F,0.0,15.35,Frame 000072\n\tperson --- in_front_of --- ref...,[Prediction_T1_2530]
3370,ZZ89F,7.4,13.50,Frame 000072\n\tperson --- in_front_of --- ref...,[Feasibility_T5_2154]
3371,ZZ89F,13.6,20.60,Frame 000129\n\tperson --- on_the_side_of --- ...,[Interaction_T1_8201]


In [61]:
r_df = unique_by_vid_gen.merge(
    unique_by_vid_gt,
    on=['video_id', 'start', 'end'],
    how='left'
)
# add qid for prompt compatibiliy 
r_df['qid'] = r_df.apply(lambda x: x['q_ids'][0], axis=1)
r_df['prompt'] = r_df.apply(lambda x: pformatter.format(x), axis=1)
r_df


Unnamed: 0,video_id,start,end,gen-stsg,gt-stsg,q_ids,qid,prompt
0,6H78U,15.6,22.7,\nFrame 0:\n\nyoung_man ---- standing_on ---- ...,Frame 000286\n\tperson --- holding --- towel\n...,[Interaction_T1_14],Interaction_T1_14,You are tasked with comparing generated Spatio...
1,RNLTR,11.6,16.4,\nFrame 0:\n\nwoman ---- standing_in_front_of ...,Frame 000194\n\tperson --- on_the_side_of --- ...,[Interaction_T1_32],Interaction_T1_32,You are tasked with comparing generated Spatio...
2,VNQTH,2.7,9.2,\nFrame 0:\n\n\nwoman ---- wearing ---- white_...,Frame 000043\n\tperson --- holding --- clothes...,"[Interaction_T1_40, Interaction_T2_31]",Interaction_T1_40,You are tasked with comparing generated Spatio...
3,Y79PC,16.9,22.6,\nFrame 0:\n\n\nwoman ---- standing_in ---- ro...,Frame 000525\n\tperson --- wearing --- clothes...,"[Interaction_T1_43, Interaction_T2_35]",Interaction_T1_43,You are tasked with comparing generated Spatio...
4,4GLAP,15.7,21.4,\nFrame 0:\n\nyoung_man ---- holding ---- plas...,Frame 000100\n\tperson --- holding --- shoe\n\...,"[Interaction_T1_71, Interaction_T4_21, Interac...",Interaction_T1_71,You are tasked with comparing generated Spatio...
...,...,...,...,...,...,...,...,...
865,FRLW2,25.7,32.0,\nFrame 0:\n\n\nwoman ---- standing_in_front_o...,Frame 000811\n\tperson --- standing_on --- flo...,"[Interaction_T1_6052, Interaction_T2_5259]",Interaction_T1_6052,You are tasked with comparing generated Spatio...
866,3W6TL,13.1,20.1,\nFrame 0:\n\nbed ---- against ---- wall\nnigh...,Frame 000401\n\tperson --- on_the_side_of --- ...,[Interaction_T2_5312],Interaction_T2_5312,You are tasked with comparing generated Spatio...
867,86GSE,10.1,20.0,\nFrame 0:\n\nwoman ---- wearing ---- red_dres...,Frame 000310\n\tperson --- beneath --- sofa/co...,"[Interaction_T1_6161, Interaction_T2_5357]",Interaction_T1_6161,You are tasked with comparing generated Spatio...
868,9J166,18.6,23.0,\nFrame 0:\n\nwoman ---- looking_at ---- windo...,Frame 000562\n\tperson --- holding --- book\n\...,[Interaction_T2_5369],Interaction_T2_5369,You are tasked with comparing generated Spatio...


In [63]:
print(r_df.iloc[0]['prompt'])


You are tasked with comparing generated Spatio-Temporal Scene Graphs (STSGs) against ground truth representations to determine their correctness.

CONSIDER A GENERATED STSG CORRECT IF:
- All major entities from the ground truth are present (allowing for hyponyms/synonyms)
- Spatial and temporal relationships are accurately represented
- Minor descriptive additions don't contradict the ground truth

CONSIDER A GENERATED STSG INCORRECT IF ANY OF THESE ERRORS OCCUR:
- Missing Critical Entity: Major entities from ground truth are absent in frames where they should appear
- Incorrect Spatial Relationships: Wrong predicates describing spatial connections (e.g., "on" vs "under", "left_of" vs "right_of")
- Incorrect Temporal Relationships: Wrong predicates describing temporal sequences (e.g., "before" vs "after", "during" vs "outside")
- Temporal Misalignment: Entities or relationships appearing in wrong time frames or durations
- Contradictory Predicates: Relationships that directly contradic

In [65]:
prompts = r_df.to_dict('records')
prompts[0]


{'video_id': '6H78U',
 'start': 15.6,
 'end': 22.7,
 'gen-stsg': '\nFrame 0:\n\nyoung_man ---- standing_on ---- floor\nyoung_man ---- looking_at ---- camera\nyoung_man ---- in_front_of ---- sofa\nsofa ---- against ---- wall\nsofa ---- has ---- pillow\npillow ---- resting_on ---- sofa\nfloor ---- beneath ---- sofa\nfloor ---- beneath ---- young_man\nwall ---- behind ---- sofa\nwindow ---- behind ---- young_man\nwindow ---- adjacent_to ---- wall\ncurtains_or_blinds ---- covering ---- window\nlight_fixture ---- mounted_on ---- wall\nlight_fixture ---- above ---- sofa\n\nFrame 1:\n\nman ---- holding ---- pillow\nman ---- standing_in_front_of ---- sofa\npillow ---- on ---- sofa\nsofa ---- against ---- wall\nair_conditioning_unit ---- mounted_on ---- wall\nelectrical_outlet ---- on ---- wall\nair_conditioning_unit ---- above ---- electrical_outlet\nfloor ---- beneath ---- sofa\nfloor ---- beneath ---- man\nman ---- wearing ---- sandals\nsofa ---- has ---- armrest\nsofa ---- has ---- backrest

In [69]:
#sys_file_path = WORK_DIR / "data/prompts/graph_gen/system_prompt.txt"

# sys_prompt = graph_gen._load_prompt_fromfile(sys_file_path)
model_options = main._load_model_options(WORK_DIR / "ollama_model_options.json")

sys_prompt = None
ollama_params = {
    "model": "gemma3:4b-it-qat",
    "system": sys_prompt,
    "stream": True,
    "options": model_options
}

url = os.environ.get("OLLAMA_URL", "http://lusha_ollama:11435")
client = ollama_manager.OllamaRequestManager(url, ollama_params)


In [70]:
# pre-load the model
client.generate_completion("hello")


Hello there! How can I help you today? 😊 

Do you want to:

*   Ask me a question?
*   Start a conversation?
*   Have me write something (like a story, poem, or code)?
*   Just say hi?

Response at: 53.8 tk/s


'Hello there! How can I help you today? 😊 \n\nDo you want to:\n\n*   Ask me a question?\n*   Start a conversation?\n*   Have me write something (like a story, poem, or code)?\n*   Just say hi?'

In [None]:
import sys
import os

# Disable print
original_stdout = sys.stdout
sys.stdout = open(os.devnull, 'w')

OUT_FILE = WORK_DIR / 'outputs/sim_between_frames_gemma3:4b-it-qat_20250603_16:33:99.jsonl'
batch_processor.batch_generate(
    client,
    prompts,
    output_file_path=OUT_FILE
)

# Restore print
sys.stdout = original_stdout
