In [1]:
import os
import json
import pandas as pd
import pysrt
import re
import shutil
from openai import OpenAI, RateLimitError
import yaml
import math

In [2]:
#load api key
api_key = ""
os.environ['OPENAI_API_KEY'] = api_key
OpenAI.api_key = os.getenv('OPENAI_API_KEY') # get your api key from here: https://platform.openai.com/account/api-keys
client = OpenAI()

In [3]:

def query_llm(MODEL, prompt, client):
    
    try:
        # Make your OpenAI API request here
        response = client.chat.completions.create(
            model=MODEL,
           messages=[{"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}],
            temperature=0,
            top_p=1,
            seed=123
        )
        #print(response.choices[0].message.content)
        response = response.choices[0].message.content
    except RateLimitError as e: # Don't use openai
        # Handle error 429
        print(f"Error 429: {e}")
    return response

In [4]:
experiment_name = "tvqa_modified_script_sampled_decompose_query_exp"

In [5]:
#prompt
prompt = ""

In [6]:
#select queries  

In [7]:
def save_json(content, save_path):
    with open(save_path, 'w') as f:
        f.write(json.dumps(content))
def load_jsonl(filename):
    with open(filename, "r") as f:
        return [json.loads(l.strip("\n")) for l in f.readlines()]
def load_result_json(filename):
    with open(filename, "r") as f:
        data = json.load(f)
    print(data)
    return data

In [8]:
vid_json_folder = "/home/hlpark/REDUCE/REDUCE_benchmarks/HiREST/data/splits/tvqa"
val = load_jsonl(f'{vid_json_folder}/all_data_val.json')
test = load_jsonl(f'{vid_json_folder}/all_data_test.json')

In [9]:
test_med_queries_path = "/home/hlpark/shared/TVQA/output_gpt_tvqa_test_queries.txt"
val_med_queries_path = "/home/hlpark/shared/TVQA/output_gpt_tvqa_val_queries.txt"
test_med_query_f_metamap_list, val_med_query_f_metamap_list = [], []
with open(test_med_queries_path, "r") as f:
    lines = f.read()
    for line in lines.split("\n"):
        if line == "\n" or line == "":
            continue
        if line.startswith(" ") or line.startswith("-"):
            line = line.lstrip("-").lstrip(" ")
        test_med_query_f_metamap_list.append(line.lower().replace(" ", "").replace("?", "").replace("\n", "").replace("-", "").replace("'", "").replace("\"", "").replace(",", "").replace(".", "").replace("/", "").replace(">", ""))
f.close()
with open(val_med_queries_path, "r") as f:
    lines = f.read()
    for line in lines.split("\n"):
        if line == "\n" or line == "":
            continue
        # if line.startswith(" ") or line.startswith("-"):
        #     line = line.lstrip("-").lstrip(" ")
        val_med_query_f_metamap_list.append(line.lower().replace(" ", "").replace("?", "").replace("\n", "").replace("-", "").replace("'", "").replace("\"", "").replace(",", "").replace(".", "").replace("/", "").replace(">", ""))
        #val_med_query_f_metamap_list.append(line.strip(" ").strip("\n").replace("  ", " "))
f.close()
print(len(test_med_query_f_metamap_list), len(val_med_query_f_metamap_list))

246 569


In [10]:
test_queries = {'med':[], 'nonmed': []}
val_queries = {'med':[], 'nonmed': []}
med_total, nonmed_total = 0, 0
for key, value in test[0].items():
    vid = next(iter(value))
    ismed = False
    newkey = key.lower().replace(" ", "").replace("?", "").replace("\n", "").replace("-", "").replace("'", "").replace("\"", "").replace(">", "").replace(",", "").replace(".", "").replace("/", "")
    if newkey  in test_med_query_f_metamap_list:
        ismed = True
    if ismed:
        med_total += 1
        test_queries['med'].append([key, vid.replace(".mp4", ""), value[vid]['bounds']])
    else:
        nonmed_total += 1
        test_queries['nonmed'].append([key, vid.replace(".mp4", ""), value[vid]['bounds']])
    
print(med_total, nonmed_total)
med_total, nonmed_total = 0, 0
for key, value in val[0].items():
    vid = next(iter(value))
    ismed = False
    newkey = key.lower().replace(" ", "").replace("?", "").replace("\n", "").replace("-", "").replace("'", "").replace("\"", "").replace(">", "").replace(",", "").replace(".", "").replace("/", "")
    if newkey  in val_med_query_f_metamap_list:
        ismed = True
    if ismed:
        med_total += 1
        val_queries['med'].append([key, vid.replace(".mp4", ""), value[vid]['bounds']])
    else:
        nonmed_total += 1
        val_queries['nonmed'].append([key, vid.replace(".mp4", ""), value[vid]['bounds']])

print(med_total, nonmed_total)

246 7372
569 14675


In [11]:
vid_duration_json = "/home/hlpark/REDUCE/REDUCE_benchmarks/HiREST/data/splits/tvqa/video_duration.json"
video_duration_dict = load_jsonl(vid_duration_json)[0]

In [14]:
import random
# randomly sample 200 from medical queries and 200 from non-medical queries

test_med_len = len(test_queries['med'])
test_nonmed_len = len(test_queries['nonmed'])
val_med_len = len(val_queries['med'])
val_nonmed_len = len(val_queries['nonmed'])

random.seed(9)
test_med_idx = random.sample(list(range(0, test_med_len)), k = 200)
test_nonmed_idx = random.sample(list(range(0, test_nonmed_len)), k = 200)
val_med_idx = random.sample(list(range(0, val_med_len)), k = 200)
val_nonmed_idx = random.sample(list(range(0, val_nonmed_len)), k = 200)
test_med_sampled_queries, test_nonmed_sampled_queries, val_med_sampled_queries, val_nonmed_sampled_queries = {}, {}, {}, {}

# create json file
# if one video have different queries, change video name (append v1, v2, ...)

videos = []
cnt = 0
for idx in test_med_idx:
    qa_dict = {}
    video_id = test_queries['med'][idx][1] + ".mp4"
    qa_dict[video_id] = {}
    qa_dict[video_id]['relevant'] = True
    qa_dict[video_id]['clip'] = True
    qa_dict[video_id]['bounds'] = test_queries['med'][idx][2]
    qa_dict[video_id]['steps'] = []
    qa_dict[video_id]['v_duration'] = video_duration_dict[test_queries['med'][idx][1] + ".mp4"]
    prompt = ""
    prompt += "You are given a question, '" +  test_queries['med'][idx][0] + "'  Decompose the question into several sentences if sentences contain temporal, causal relations such as 'after', 'before', 'when'. The output sentences should be simple containing a single event in a sentence. Do not output explanation and only give decomposed sentences. Output format should be '1. example output sentence 1.\n 2. example output sentence 2.\n'"
    
    print(prompt+"\n")
    reply = query_llm("gpt-4-0125-preview", prompt, client)
    print(reply)

    for i, output in enumerate(reply.split("\n")):
        temp_qa_dict = qa_dict.copy()
        temp_qa_dict['original_qa'] = test_queries['med'][idx][0]
        test_med_sampled_queries[output.replace(str(i+1) + ".", "").strip(" ")] = temp_qa_dict
        #print(temp_qa_dict)
        cnt += 1
    # if cnt >10:
    #     break
print(len(test_med_sampled_queries), cnt)
# print(f"{cnt} new files copied to feature folder")

videos = []
cnt = 0
for idx in test_nonmed_idx:
    qa_dict = {}
    video_id = test_queries['nonmed'][idx][1] + ".mp4"

    qa_dict[video_id] = {}
    qa_dict[video_id]['relevant'] = True
    qa_dict[video_id]['clip'] = True
    qa_dict[video_id]['bounds'] = test_queries['nonmed'][idx][2]
    qa_dict[video_id]['steps'] = []
    qa_dict[video_id]['v_duration'] = video_duration_dict[test_queries['nonmed'][idx][1] + ".mp4"]
    prompt = ""
    prompt += "You are given a question, '" +  test_queries['nonmed'][idx][0] + "'  Decompose the question into several sentences if sentences contain temporal, causal relations such as 'after', 'before', 'when'. The output sentences should be simple containing a single event in a sentence. Do not output explanation and only give decomposed sentences. Output format should be '1. example output sentence 1.\n 2. example output sentence 2.\n'"
    
    print(prompt+"\n")
    reply = query_llm("gpt-4-0125-preview", prompt, client)
    print(reply)

    for i, output in enumerate(reply.split("\n")):
        temp_qa_dict = qa_dict.copy()
        temp_qa_dict['original_qa'] = test_queries['nonmed'][idx][0]
        test_nonmed_sampled_queries[output.replace(str(i+1) + ".", "").strip(" ")] = temp_qa_dict
        #print(temp_qa_dict)
        cnt += 1

print(len(test_nonmed_sampled_queries), cnt)

videos = []
cnt = 0
for idx in val_med_idx:
    qa_dict = {}
    video_id = val_queries['med'][idx][1] + ".mp4"

    qa_dict[video_id] = {}
    qa_dict[video_id]['relevant'] = True
    qa_dict[video_id]['clip'] = True
    qa_dict[video_id]['bounds'] = val_queries['med'][idx][2]
    qa_dict[video_id]['steps'] = []
    qa_dict[video_id]['v_duration'] = video_duration_dict[val_queries['med'][idx][1] + ".mp4"]
    prompt = ""
    prompt += "You are given a question, '" +  val_queries['med'][idx][0] + "'  Decompose the question into several sentences if sentences contain temporal, causal relations such as 'after', 'before', 'when'. The output sentences should be simple containing a single event in a sentence. Do not output explanation and only give decomposed sentences. Output format should be '1. example output sentence 1.\n 2. example output sentence 2.\n'"
    
    print(prompt+"\n")
    reply = query_llm("gpt-4-0125-preview", prompt, client)
    print(reply)

    for i, output in enumerate(reply.split("\n")):
        temp_qa_dict = qa_dict.copy()
        temp_qa_dict['original_qa'] = val_queries['med'][idx][0]
        val_med_sampled_queries[output.replace(str(i+1) + ".", "").strip(" ")] = temp_qa_dict
        #print(temp_qa_dict)
        cnt += 1

print(len(val_med_sampled_queries), cnt) 

videos = []
cnt = 0
for idx in val_nonmed_idx:
    qa_dict = {}
    video_id = val_queries['nonmed'][idx][1] + ".mp4"
    qa_dict[video_id] = {}
    qa_dict[video_id]['relevant'] = True
    qa_dict[video_id]['clip'] = True
    qa_dict[video_id]['bounds'] = val_queries['nonmed'][idx][2]
    qa_dict[video_id]['steps'] = []
    qa_dict[video_id]['v_duration'] = video_duration_dict[val_queries['nonmed'][idx][1] + ".mp4"]
    prompt = ""
    prompt += "You are given a question, '" +  val_queries['nonmed'][idx][0] + "'  Decompose the question into several sentences if sentences contain temporal, causal relations such as 'after', 'before', 'when'. The output sentences should be simple containing a single event in a sentence. Do not output explanation and only give decomposed sentences. Output format should be '1. example output sentence 1.\n 2. example output sentence 2.\n'"
    
    print(prompt+"\n")
    reply = query_llm("gpt-4-0125-preview", prompt, client)
    print(reply)

    for i, output in enumerate(reply.split("\n")):
        temp_qa_dict = qa_dict.copy()
        temp_qa_dict['original_qa'] = val_queries['nonmed'][idx][0]
        val_nonmed_sampled_queries[output.replace(str(i+1) + ".", "").strip(" ")] = temp_qa_dict
        # print(temp_qa_dict)
        cnt += 1
print(len(val_nonmed_sampled_queries), cnt)

You are given a question, 'Who still insists on a scratch test after House gives his diagnosis on the woman's allergies?'  Decompose the question into several sentences if sentences contain temporal, causal relations such as 'after', 'before', 'when'. The output sentences should be simple containing a single event in a sentence. Do not output explanation and only give decomposed sentences. Output format should be '1. example output sentence 1.
 2. example output sentence 2.
'

1. House gives his diagnosis on the woman's allergies.
2. Someone insists on a scratch test.
You are given a question, 'What does House do after Ali tell him she caught the Rhino thing her dad had?'  Decompose the question into several sentences if sentences contain temporal, causal relations such as 'after', 'before', 'when'. The output sentences should be simple containing a single event in a sentence. Do not output explanation and only give decomposed sentences. Output format should be '1. example output sente

In [15]:
# create folder for each testing set (asr features have to be seprate folder as well)
json_save_folder = "/home/hlpark/REDUCE/REDUCE_benchmarks/HiREST/data/splits"

exp_list = ['baseline', 'visual_med', 'visual_med_with_audio', 'visual_nonmed', 'visual_nonmed_with_audio', 'full', 'full_without_audio']

for exp in exp_list:
    json_folder = os.path.join(json_save_folder, experiment_name, exp)

    if not os.path.exists(json_folder):
        os.makedirs(json_folder)

    if not os.path.exists(os.path.join(json_folder, "medical_test")):
        os.makedirs(os.path.join(json_folder, "medical_test"))

    save_json(test_med_sampled_queries, f'{os.path.join(json_folder, "medical_test")}/all_data_test.json')

    if not os.path.exists(os.path.join(json_folder, "nonmedical_test")):
        os.makedirs(os.path.join(json_folder, "nonmedical_test"))

    save_json(test_nonmed_sampled_queries, f'{os.path.join(json_folder, "nonmedical_test")}/all_data_test.json')

    if not os.path.exists(os.path.join(json_folder, "medical_val")):
        os.makedirs(os.path.join(json_folder, "medical_val"))

    save_json(val_med_sampled_queries, f'{os.path.join(json_folder, "medical_val")}/all_data_val.json')

    if not os.path.exists(os.path.join(json_folder, "nonmedical_val")):
        os.makedirs(os.path.join(json_folder, "nonmedical_val"))

    save_json(val_nonmed_sampled_queries, f'{os.path.join(json_folder, "nonmedical_val")}/all_data_val.json')
