We are working with a sample of MSR-VTT and our evaluation pipeline supports only one relevant query per video, hence we need to filter the full FIRE benchmark to only include videos we have sampled and ones with a single relevant result.

In [1]:
FIRE_MSRVTT_RAW = "data/FIRE/fire-data/fire_msrvtt_dataset.json"
FIRE_BENCHMARK_Q_JUDGEMENTS = "./fire_benchmark_q_judgements.csv"

In [3]:
import json
import pandas as pd

def open_json(path):
    with open(path) as f:
        data = json.load(f)
    return data

fire_data = open_json(FIRE_MSRVTT_RAW)
fire_judgements_raw_df = pd.DataFrame(fire_data["annotations"])
fire_judgements_raw_df

Unnamed: 0,annotator_ids,label,video_id,query,annotator_labels,response_ids,queue_ids,models,dataset
0,[84],irrelevant,video7026,a 3d animation of a cabinet with plates,[irrelevant],[514705627045591],[4985131491553502],[CE],MSRVTT
1,[62],irrelevant,video7418,a 3d animation of a cabinet with plates,[irrelevant],[1009913593254130],[4985131491553502],[CE],MSRVTT
2,[50],irrelevant,video7763,a 3d animation of a cabinet with plates,[irrelevant],[3280646372150702],[4995314980504271],[SSB],MSRVTT
3,[67],relevant,video7768,a 3d animation of a cabinet with plates,[relevant],[1014281349443178],[5083898558315210],[CLIP4CLIP],MSRVTT
4,[55],irrelevant,video7822,a 3d animation of a cabinet with plates,[irrelevant],[2619512781525857],[4995314980504271],[SSB],MSRVTT
...,...,...,...,...,...,...,...,...,...
24162,[69],irrelevant,video9770,young people sit around the edges of a room cl...,[irrelevant],[310721911172645],[4995314980504271],[SSB],MSRVTT
24163,[40],irrelevant,video9788,young people sit around the edges of a room cl...,[irrelevant],[5180951918610413],[4985131491553502],[CE],MSRVTT
24164,[36],irrelevant,video9814,young people sit around the edges of a room cl...,[irrelevant],[498969301872125],[4985131491553502],[CE],MSRVTT
24165,[57],irrelevant,video9815,young people sit around the edges of a room cl...,[irrelevant],[1154835725293034],[5083898558315210],[CLIP4CLIP],MSRVTT


In [4]:
def get_cleaned_fire_judgements(raw_fire_annotations):
    all_queries = [a["query"] for a in raw_fire_annotations]
    print("Total queries", len(all_queries))
    unique_queries = list(set(all_queries))
    print("Unique queries", len(unique_queries))
    
    # Initialize lists to store relevant and non-relevant IDs
    relevant_ids = []
    non_relevant_ids = []
    
    # Iterate through the first queries and categorize the IDs
    for query in unique_queries:
        relevant = []
        non_relevant = []
        for annotation in raw_fire_annotations:
            if annotation["query"] == query:
                if annotation["label"] == "relevant":
                    relevant.append(annotation["video_id"])
                else:
                    non_relevant.append(annotation["video_id"])
        relevant_ids.append(relevant)
        non_relevant_ids.append(non_relevant)
        

    return pd.DataFrame({
    "query": unique_queries,
    "relevant_ids": relevant_ids,
    "non_relevant_ids": non_relevant_ids
    }).copy()
    

In [6]:
fire_judgements_df = get_cleaned_fire_judgements(fire_data["annotations"])
print("Total queries", len(fire_judgements_df))
fire_judgements_df.head()

Total queries 24167
Unique queries 995
Total queries 995


Unnamed: 0,query,relevant_ids,non_relevant_ids
0,a person is singing in a studio,"[video7205, video7598, video8818]","[video7156, video7213, video7367, video7575, v..."
1,a news reporter talks about a shooting,"[video8822, video8864, video9204]","[video7178, video7346, video7593, video7919, v..."
2,sports vine clips of football,"[video7358, video7787, video7839, video8913, v...","[video7029, video7373, video7548, video7698, v..."
3,sygornie weaver pointing a gun and then running,[video9784],"[video7207, video7362, video7368, video7545, v..."
4,a girl walking down a path,"[video7467, video9451, video9521]","[video7372, video7556, video7747, video7837, v..."


In [7]:
# Filter only queries with a single relevant result
fire_judgements_single_result = fire_judgements_df[fire_judgements_df["relevant_ids"].apply(lambda x: len(x) == 1)]
fire_judgements_single_result

Unnamed: 0,query,relevant_ids,non_relevant_ids
3,sygornie weaver pointing a gun and then running,[video9784],"[video7207, video7362, video7368, video7545, v..."
6,a man a woman cooking on a cooking show,[video8662],"[video7141, video7415, video7542, video7572, v..."
12,a guy is talking over loud music at the end of...,[video7028],"[video7142, video7375, video7412, video7575, v..."
17,a lady describes about workout and exercises f...,[video7724],"[video7134, video7145, video7170, video7368, v..."
20,a video of a young man in a white shirt inviti...,[video7469],"[video7118, video7131, video7169, video7345, v..."
...,...,...,...
978,a woman feeds another,[video8345],"[video7141, video7349, video7372, video7500, v..."
981,a man gets shot in the face,[video8249],"[video7118, video7131, video7352, video7597, v..."
984,a girl explains about some studies showing som...,[video7577],"[video7144, video7216, video7231, video7580, v..."
987,a person folds a paper airplane,[video8688],"[video7363, video7415, video7418, video7829, v..."


In [9]:
# Filter to include queries whose result is in our 1k video sample
MSRVTT_SAMPLES = "./MSRVTT_1K.csv"
raw_samples_df = pd.read_csv(MSRVTT_SAMPLES)
video_ids_in_sample = set(raw_samples_df["video_id"].apply(lambda x: int(x[len("video"):])))
final_fire_jud = fire_judgements_single_result[fire_judgements_single_result["relevant_ids"].apply(lambda x: int(x[0][len("video"):]) in video_ids_in_sample)]
final_fire_jud['video_id'] = final_fire_jud['relevant_ids'].apply(lambda x: x[0])
final_fire_jud['sentence'] = final_fire_jud['query']
final_fire_jud

Unnamed: 0,query,relevant_ids,non_relevant_ids,video_id,sentence
3,sygornie weaver pointing a gun and then running,[video9784],"[video7207, video7362, video7368, video7545, v...",video9784,sygornie weaver pointing a gun and then running
6,a man a woman cooking on a cooking show,[video8662],"[video7141, video7415, video7542, video7572, v...",video8662,a man a woman cooking on a cooking show
12,a guy is talking over loud music at the end of...,[video7028],"[video7142, video7375, video7412, video7575, v...",video7028,a guy is talking over loud music at the end of...
17,a lady describes about workout and exercises f...,[video7724],"[video7134, video7145, video7170, video7368, v...",video7724,a lady describes about workout and exercises f...
20,a video of a young man in a white shirt inviti...,[video7469],"[video7118, video7131, video7169, video7345, v...",video7469,a video of a young man in a white shirt inviti...
...,...,...,...,...,...
978,a woman feeds another,[video8345],"[video7141, video7349, video7372, video7500, v...",video8345,a woman feeds another
981,a man gets shot in the face,[video8249],"[video7118, video7131, video7352, video7597, v...",video8249,a man gets shot in the face
984,a girl explains about some studies showing som...,[video7577],"[video7144, video7216, video7231, video7580, v...",video7577,a girl explains about some studies showing som...
987,a person folds a paper airplane,[video8688],"[video7363, video7415, video7418, video7829, v...",video8688,a person folds a paper airplane


In [10]:
# Save the final DataFrame to a CSV file
final_fire_jud.to_csv(FIRE_BENCHMARK_Q_JUDGEMENTS, index=False) 
print("Final FIRE benchmark judgements saved to", FIRE_BENCHMARK_Q_JUDGEMENTS)

Final FIRE benchmark judgements saved to ./fire_benchmark_q_judgements.csv
