In [14]:
import os, sys
from pathlib import Path
import json

import numpy as np
import pandas as pd

WORK_DIR = Path.cwd().parent

sys.path.append(str(WORK_DIR))

from src import graph_gen
from src import ollama_manager


In [7]:
CVRR_DIR = Path("/multiverse/datasets/shared/cvrr_challange25/val")
VIDEO_DIR = WORK_DIR / "data/datasets/action-genome/Charades_v1_480"
video_ids = [{'video_id': "0A8CF"}]
video_path = VIDEO_DIR / "0A8CF.mp4"


In [12]:
stsg_star = []
with open(f"{WORK_DIR}/data/datasets/STAR_QA_and_stsg_val.json") as f:
    stsg_star = json.load(f)


star_data = []
with open(f"{WORK_DIR}/data/datasets/STAR/STAR_annotations/STAR_val.json", 'r') as f:
    star_data = json.load(f)


In [13]:
same_order = True
for i, j in zip(stsg_star, star_data):
    test = i['question_id'] == j['question_id']
    same_order = same_order and test

same_order


True

In [10]:
def get_vocab_map(label_dir):
    vocab_map = dict()

    with open(label_dir) as in_file:
        for line in in_file.readlines():
            mapping = line.strip('\n')
            key, val = mapping.split(' ')
            vocab_map[key] = val
    
    return vocab_map

obj_vocab = get_vocab_map(WORK_DIR / 'data/datasets/STAR/STAR_annotations/class_maps/object_classes.txt')
rel_vocab = get_vocab_map(WORK_DIR / 'data/datasets/STAR/STAR_annotations/class_maps/relationship_classes.txt')

def generate_stsg(q_data):
    frame_ids = sorted(q_data['situations'].keys())

    stsg = []
    for f in frame_ids:
        frame_sg = []
        rels = q_data['situations'][f]['rel_labels']
        
        frame_sg.append(f"Frame {f}\n")
        for rel_pair, rel in zip(q_data['situations'][f]['rel_pairs'], rels):
            obj1, obj2 = rel_pair
            frame_sg.append(f'\t{obj_vocab[obj1]} --- {rel_vocab[rel]} --- {obj_vocab[obj2]}\n')
        
        frame_sg = ''.join(frame_sg)
        stsg.append(frame_sg)
    
    return stsg


In [15]:
print(''.join(generate_stsg(star_data[0])))


Frame 000198
	person --- on_the_side_of --- clothes
Frame 000202
	person --- in_front_of --- clothes
Frame 000205
	person --- in_front_of --- clothes
	person --- in_front_of --- blanket
Frame 000206
	person --- in_front_of --- towel
	person --- in_front_of --- clothes
	person --- in_front_of --- blanket
	person --- on_the_side_of --- blanket
Frame 000212
	person --- on_the_side_of --- towel
	person --- in_front_of --- towel
	person --- in_front_of --- clothes
	person --- on_the_side_of --- blanket
Frame 000217
	person --- in_front_of --- towel
	person --- on_the_side_of --- clothes
	person --- in_front_of --- blanket
	person --- on_the_side_of --- blanket
Frame 000218
	person --- in_front_of --- clothes
	person --- in_front_of --- blanket
	person --- on_the_side_of --- blanket
Frame 000221
	person --- in_front_of --- towel
	person --- in_front_of --- clothes
	person --- in_front_of --- blanket
Frame 000223
	person --- in_front_of --- towel
	person --- in_front_of --- clothes
	person --

In [19]:
new_stsg = []
for el in star_data:
    entry = {
        'question_id': el['question_id'],
        'video_id': el['video_id'],
        'start': el['start'],
        'end': el['end'],
        'stsg': ''.join(generate_stsg(el)),

    }
    new_stsg.append(entry)


In [20]:
print(new_stsg[0])


{'question_id': 'Interaction_T1_13', 'video_id': '6H78U', 'start': 11.1, 'end': 19.6, 'stsg': 'Frame 000198\n\tperson --- on_the_side_of --- clothes\nFrame 000202\n\tperson --- in_front_of --- clothes\nFrame 000205\n\tperson --- in_front_of --- clothes\n\tperson --- in_front_of --- blanket\nFrame 000206\n\tperson --- in_front_of --- towel\n\tperson --- in_front_of --- clothes\n\tperson --- in_front_of --- blanket\n\tperson --- on_the_side_of --- blanket\nFrame 000212\n\tperson --- on_the_side_of --- towel\n\tperson --- in_front_of --- towel\n\tperson --- in_front_of --- clothes\n\tperson --- on_the_side_of --- blanket\nFrame 000217\n\tperson --- in_front_of --- towel\n\tperson --- on_the_side_of --- clothes\n\tperson --- in_front_of --- blanket\n\tperson --- on_the_side_of --- blanket\nFrame 000218\n\tperson --- in_front_of --- clothes\n\tperson --- in_front_of --- blanket\n\tperson --- on_the_side_of --- blanket\nFrame 000221\n\tperson --- in_front_of --- towel\n\tperson --- in_front_

In [21]:
# with  open(f"{WORK_DIR}/data/datasets/STAR_verbalized_stsg_val.json", 'w') as f:
#     json.dump(new_stsg, f, indent=4)
# 


# Small Sample

In [98]:
STAR_VAL_FILEPATH = WORK_DIR / "data/datasets/STAR/STAR_annotations/STAR_train.json"


In [99]:
with open(STAR_VAL_FILEPATH, 'r') as f:
    star_df = pd.DataFrame(json.load(f))


In [100]:
inter_pos = np.where(star_df['question_id'].str.startswith('Int'))[0]
seq_pos   = np.where(star_df['question_id'].str.startswith('Seq'))[0]
feas_pos  = np.where(star_df['question_id'].str.startswith('Fea'))[0]
pred_pos  = np.where(star_df['question_id'].str.startswith('Pre'))[0]

print(f"Interaction type size: {len(inter_pos)}")
print(f"Sequence type size: {len(seq_pos)}")
print(f"Feasibility type size: {len(feas_pos)}")
print(f"Prediction type size: {len(pred_pos)}")


Interaction type size: 16303
Sequence type size: 22262
Feasibility type size: 3011
Prediction type size: 4155


In [101]:
inter_subset = np.random.choice(inter_pos, 300, replace=False)
seq_subset = np.random.choice(seq_pos, 300, replace=False)
feas_subset = np.random.choice(feas_pos, 300, replace=False)
pred_subset = np.random.choice(pred_pos, 300, replace=False)


In [102]:
subset_1200 = np.concat([inter_subset, seq_subset, feas_subset, pred_subset])
np.random.shuffle(subset_1200)
subset_1200.shape


(1200,)

In [103]:
o = star_df.iloc[subset_1200].to_dict('records')
len(o)


1200

In [104]:
with open(WORK_DIR / 'data/datasets/STAR/STAR_annotations/STAR_val_small_1200bis.json', 'w') as f:
    json.dump(o, f, indent=2)


In [105]:
a = pd.DataFrame()
a['start']  = star_df.groupby('video_id')['start'].agg('min')
a['end'] = star_df.groupby('video_id')['start'].agg('max')
a


Unnamed: 0_level_0,start,end
video_id,Unnamed: 1_level_1,Unnamed: 2_level_1
00607,0.0,25.9
00MFE,0.0,14.7
00N38,0.7,14.3
00T1E,0.0,0.7
00X3U,9.7,14.9
...,...,...
ZXN6A,0.0,23.8
ZY2B1,5.4,16.6
ZZ3HT,1.2,6.8
ZZ9RN,1.2,12.3


In [122]:
a = star_df.groupby(['video_id', 'start', 'end'])['question_id'].count()

print(f'Unique videos: {(a > 9).sum()}')
print(f'Question associated: {a[a > 9].sum()}')


Unique videos: 180
Question associated: 1114


In [123]:
res = star_df.set_index(['video_id', 'start', 'end']).loc[a > 9].reset_index()
res['question_id'].apply(lambda x: x[:3]).value_counts()


question_id
Int    801
Seq    245
Fea     68
Name: count, dtype: int64

In [112]:
res


Unnamed: 0,video_id,start,end,question_id,question,answer,question_program,choices,situations
0,YE47Y,5.6,11.2,Interaction_T1_330,Which object was taken by the person?,The phone/camera.,"[{'function': 'Situations', 'value_input': []}...","[{'choice_id': 0, 'choice': 'The clothes.', 'c...","{'000172': {'rel_pairs': [['o000', 'o021'], ['..."
1,YE47Y,3.8,8.3,Interaction_T1_331,Which object was taken by the person?,The phone/camera.,"[{'function': 'Situations', 'value_input': []}...","[{'choice_id': 0, 'choice': 'The cup/glass/bot...","{'000128': {'rel_pairs': [['o000', 'o021'], ['..."
2,YE47Y,7.7,12.3,Interaction_T1_332,Which object was put down by the person?,The pillow.,"[{'function': 'Situations', 'value_input': []}...","[{'choice_id': 0, 'choice': 'The cup/glass/bot...","{'000236': {'rel_pairs': [['o000', 'o021'], ['..."
3,FVINY,25.1,29.6,Interaction_T1_397,Which object was put down by the person?,The box.,"[{'function': 'Situations', 'value_input': []}...","[{'choice_id': 0, 'choice': 'The laptop.', 'ch...","{'000756': {'rel_pairs': [['o000', 'o017'], ['..."
4,PJDUN,0.0,4.3,Interaction_T1_471,Which object was lied on by the person?,The bed.,"[{'function': 'Situations', 'value_input': []}...","[{'choice_id': 0, 'choice': 'The bed.', 'choic...","{'000024': {'rel_pairs': [['o000', 'o018'], ['..."
...,...,...,...,...,...,...,...,...,...
869,SSKK6,0.0,5.2,Feasibility_T6_1021,What is the person able to do after sitting at...,Throw the bag.,"[{'function': 'Situations', 'value_input': []}...","[{'choice_id': 0, 'choice': 'Throw the bag.', ...","{'000013': {'rel_pairs': [['o000', 'o028'], ['..."
870,Z7UOT,14.3,21.2,Feasibility_T6_1135,What is the person able to do after putting do...,Take the box.,"[{'function': 'Situations', 'value_input': []}...","[{'choice_id': 0, 'choice': 'Wash the dish.', ...","{'000739': {'rel_pairs': [['o000', 'o015'], ['..."
871,WFD5Z,6.5,11.7,Feasibility_T6_1274,What is the person able to do after closing th...,Eat the sandwich.,"[{'function': 'Situations', 'value_input': []}...","[{'choice_id': 0, 'choice': 'Eat the sandwich....","{'000221': {'rel_pairs': [['o000', 'o006'], ['..."
872,3DLKC,0.0,5.6,Feasibility_T6_1277,What is the person able to do after opening th...,Take the towel.,"[{'function': 'Situations', 'value_input': []}...","[{'choice_id': 0, 'choice': 'Take the towel.',...","{'000085': {'rel_pairs': [['o000', 'o021'], ['..."
