In [38]:
import os
import csv
from towhee import ops, pipe, register
from towhee.operator import PyOperator
from towhee import DataCollection
from tqdm import tqdm
import pandas as pd
import json
import numpy as np
from helpers import milvus_utils
from helpers import eval_utils as my_eval_utils

In [40]:
# CONSTANTS
MSRVTT_SAMPLES = "./MSRVTT_1K.csv"
VIDEO_RET_TUNING_COLLECTION_PREFIX = "msrvtt_vid_hyp_tun_"

In [41]:
raw_samples_df = pd.read_csv(MSRVTT_SAMPLES)
raw_samples_df[['video_id', 'video_path', 'sentence']].head()

Unnamed: 0,video_id,video_path,sentence
0,video7579,./test_1k_compress/video7579.mp4,a girl wearing red top and black trouser is pu...
1,video7725,./test_1k_compress/video7725.mp4,young people sit around the edges of a room cl...
2,video9258,./test_1k_compress/video9258.mp4,a person is using a phone
3,video7365,./test_1k_compress/video7365.mp4,cartoon people are eating at a restaurant
4,video8068,./test_1k_compress/video8068.mp4,a woman on a couch talks to a a man


In [43]:
def create_loader_pipeline(uts_value, milvus_col_name):
    print(f"Creating loader pipeline for {uts_value} with collection {milvus_col_name}")
    def read_loader_csv(csv_file):
        with open(csv_file, 'r', encoding='utf-8-sig') as f:
            data = csv.DictReader(f)
            for line in data:
                yield int(line['video_id'][len('video'):]), line['video_path']

    video_loader_pipeline = (
        pipe.input('csv_file')
        .flat_map('csv_file', ('video_id', 'video_path'), read_loader_csv)
        # Create 12 evenly distributed frames per video
        .map('video_path', 'frames', ops.video_decode.ffmpeg(sample_type='uniform_temporal_subsample', 
                                                             args={'num_samples': uts_value}))
        # I have a M2 Max, so device is set to mps for better performance
        .map('frames', 'vec', ops.video_text_embedding.clip4clip(model_name='clip_vit_b32', 
                                                                 modality='video', device='mps'))
        .map(('video_id', 'vec'), (), ops.ann_insert.milvus_client(collection_name=milvus_col_name))
        .output('video_id')
    )
    return video_loader_pipeline

def create_searcher_pipeline(milvus_col_name):
    print(f"Creating searcher pipeline with collection {milvus_col_name}")
    def read_video_search_csv(csv_file):
        with open(csv_file, 'r', encoding='utf-8-sig') as f:
            data = csv.DictReader(f)
            for line in data:
                yield line['video_id'], line['sentence']

    video_search_pipeline = (
        pipe.input('csv_file')
        .flat_map('csv_file', ('rel_video_id', 'query'), read_video_search_csv)
        .map('query', 'vec', ops.video_text_embedding.clip4clip(model_name='clip_vit_b32', modality='text', device='mps'))
        .map('vec', 'top10_raw_res', 
            ops.ann_search.milvus_client(collection_name=milvus_col_name, limit=10))
        .map('top10_raw_res', ('top1', 'top5', 'top10'), lambda x: (x[:1], x[:5], x[:10]))
        .output('rel_video_id', 'query', 'top1', 'top5', 'top10')
    )
    return video_search_pipeline
        

def load_and_query_c4c(uts_values):
    # (1) Create a new collection for each experiment
    milvus_cols = {uts_val: VIDEO_RET_TUNING_COLLECTION_PREFIX +
                   str(uts_val) for uts_val in uts_values}
    print(f"Creating collections: {list(milvus_cols.values())}")
    for uv, m_col_name in milvus_cols.items():
        milvus_utils.create_milvus_collection(m_col_name, 512)

    # (2) Create different loader pipelines for each experiment
    loader_pipelines = {}
    searcher_pipelines = {}
    search_dcs = {}
    search_results_dfs = {}
    for uv, m_col_name in milvus_cols.items():
        print(f"Creating loader pipeline for {uv}")
        loader_pipelines[uv] = create_loader_pipeline(uv, m_col_name)
        searcher_pipelines[uv] = create_searcher_pipeline(m_col_name)
    
    # (3) Call each loader pipeline
    for uv, m_col_name in milvus_cols.items():
        print(f"Loading data into {m_col_name}")
        pipe = loader_pipelines[uv]
        pipe(MSRVTT_SAMPLES)
        print(f"Finished loading data into {m_col_name}")
    
    # (4) Call each searcher pipeline
    for uv, m_col_name in milvus_cols.items():
        print(f"Searching data in {m_col_name}")
        search_pipe = searcher_pipelines[uv]
        search_dc = DataCollection(search_pipe(MSRVTT_SAMPLES))
        search_dcs[uv] = search_dc
        search_results_dfs[uv] = my_eval_utils.twohee_data_col_to_df(search_dc)
        print(f"Finished searching data in {m_col_name}")

    return search_results_dfs

In [44]:
experiment_query_results = load_and_query_c4c([4, 6, 8, 10, 12])

Creating collections: ['msrvtt_vid_hyp_tun_4', 'msrvtt_vid_hyp_tun_6', 'msrvtt_vid_hyp_tun_8', 'msrvtt_vid_hyp_tun_10', 'msrvtt_vid_hyp_tun_12']
Creating loader pipeline for 4
Creating loader pipeline for 4 with collection msrvtt_vid_hyp_tun_4
Creating searcher pipeline with collection msrvtt_vid_hyp_tun_4
Creating loader pipeline for 6
Creating loader pipeline for 6 with collection msrvtt_vid_hyp_tun_6
Creating searcher pipeline with collection msrvtt_vid_hyp_tun_6
Creating loader pipeline for 8
Creating loader pipeline for 8 with collection msrvtt_vid_hyp_tun_8
Creating searcher pipeline with collection msrvtt_vid_hyp_tun_8
Creating loader pipeline for 10
Creating loader pipeline for 10 with collection msrvtt_vid_hyp_tun_10
Creating searcher pipeline with collection msrvtt_vid_hyp_tun_10
Creating loader pipeline for 12
Creating loader pipeline for 12 with collection msrvtt_vid_hyp_tun_12
Creating searcher pipeline with collection msrvtt_vid_hyp_tun_12


2025-04-17 16:56:40,676 - 14023471104 - node.py-node:167 - INFO: Begin to run Node-_input
2025-04-17 16:56:40,680 - 14040297472 - node.py-node:167 - INFO: Begin to run Node-read_loader_csv-0
2025-04-17 16:56:40,681 - 14023471104 - node.py-node:167 - INFO: Begin to run Node-video-decode/ffmpeg-1
2025-04-17 16:56:40,681 - 14057123840 - node.py-node:167 - INFO: Begin to run Node-video-text-embedding/clip4clip-2
2025-04-17 16:56:40,681 - 14073950208 - node.py-node:167 - INFO: Begin to run Node-ann-insert/milvus-client-3
2025-04-17 16:56:40,682 - 14090776576 - node.py-node:167 - INFO: Begin to run Node-_output


Loading data into msrvtt_vid_hyp_tun_4


2025-04-17 16:57:32,722 - 26208137216 - node.py-node:167 - INFO: Begin to run Node-_input
2025-04-17 16:57:32,722 - 26224963584 - node.py-node:167 - INFO: Begin to run Node-read_loader_csv-0
2025-04-17 16:57:32,723 - 26241789952 - node.py-node:167 - INFO: Begin to run Node-video-decode/ffmpeg-1
2025-04-17 16:57:32,723 - 26258616320 - node.py-node:167 - INFO: Begin to run Node-video-text-embedding/clip4clip-2
2025-04-17 16:57:32,723 - 26208137216 - node.py-node:167 - INFO: Begin to run Node-ann-insert/milvus-client-3
2025-04-17 16:57:32,724 - 26323890176 - node.py-node:167 - INFO: Begin to run Node-_output


Finished loading data into msrvtt_vid_hyp_tun_4
Loading data into msrvtt_vid_hyp_tun_6


2025-04-17 16:58:28,509 - 20865642496 - node.py-node:167 - INFO: Begin to run Node-_input
2025-04-17 16:58:28,510 - 20966305792 - node.py-node:167 - INFO: Begin to run Node-read_loader_csv-0
2025-04-17 16:58:28,511 - 20992520192 - node.py-node:167 - INFO: Begin to run Node-video-decode/ffmpeg-1
2025-04-17 16:58:28,512 - 25954381824 - node.py-node:167 - INFO: Begin to run Node-video-text-embedding/clip4clip-2
2025-04-17 16:58:28,512 - 20865642496 - node.py-node:167 - INFO: Begin to run Node-ann-insert/milvus-client-3
2025-04-17 16:58:28,512 - 25971208192 - node.py-node:167 - INFO: Begin to run Node-_output


Finished loading data into msrvtt_vid_hyp_tun_6
Loading data into msrvtt_vid_hyp_tun_8


2025-04-17 16:59:30,335 - 26885517312 - node.py-node:167 - INFO: Begin to run Node-_input
2025-04-17 16:59:30,336 - 26915942400 - node.py-node:167 - INFO: Begin to run Node-read_loader_csv-0
2025-04-17 16:59:30,336 - 26932768768 - node.py-node:167 - INFO: Begin to run Node-video-decode/ffmpeg-1
2025-04-17 16:59:30,337 - 26949595136 - node.py-node:167 - INFO: Begin to run Node-video-text-embedding/clip4clip-2
2025-04-17 16:59:30,337 - 26885517312 - node.py-node:167 - INFO: Begin to run Node-ann-insert/milvus-client-3
2025-04-17 16:59:30,337 - 26966421504 - node.py-node:167 - INFO: Begin to run Node-_output


Finished loading data into msrvtt_vid_hyp_tun_8
Loading data into msrvtt_vid_hyp_tun_10


2025-04-17 17:00:49,228 - 30244630528 - node.py-node:167 - INFO: Begin to run Node-_input
2025-04-17 17:00:49,229 - 30261456896 - node.py-node:167 - INFO: Begin to run Node-read_loader_csv-0
2025-04-17 17:00:49,229 - 30278283264 - node.py-node:167 - INFO: Begin to run Node-video-decode/ffmpeg-1
2025-04-17 17:00:49,230 - 30295109632 - node.py-node:167 - INFO: Begin to run Node-video-text-embedding/clip4clip-2
2025-04-17 17:00:49,231 - 30311936000 - node.py-node:167 - INFO: Begin to run Node-ann-insert/milvus-client-3
2025-04-17 17:00:49,231 - 30244630528 - node.py-node:167 - INFO: Begin to run Node-_output


Finished loading data into msrvtt_vid_hyp_tun_10
Loading data into msrvtt_vid_hyp_tun_12


2025-04-17 17:02:01,395 - 31135330304 - node.py-node:167 - INFO: Begin to run Node-_input
2025-04-17 17:02:01,396 - 31692189696 - node.py-node:167 - INFO: Begin to run Node-read_video_search_csv-0
2025-04-17 17:02:01,397 - 31709016064 - node.py-node:167 - INFO: Begin to run Node-video-text-embedding/clip4clip-1
2025-04-17 17:02:01,398 - 31725842432 - node.py-node:167 - INFO: Begin to run Node-ann-search/milvus-client-2
2025-04-17 17:02:01,398 - 31135330304 - node.py-node:167 - INFO: Begin to run Node-lambda-3
2025-04-17 17:02:01,398 - 31742668800 - node.py-node:167 - INFO: Begin to run Node-_output


Finished loading data into msrvtt_vid_hyp_tun_12
Searching data in msrvtt_vid_hyp_tun_4


2025-04-17 17:03:13,745 - 16898895872 - node.py-node:167 - INFO: Begin to run Node-_input
2025-04-17 17:03:13,745 - 16990040064 - node.py-node:167 - INFO: Begin to run Node-read_video_search_csv-0
2025-04-17 17:03:13,745 - 17096011776 - node.py-node:167 - INFO: Begin to run Node-video-text-embedding/clip4clip-1
2025-04-17 17:03:13,746 - 17134809088 - node.py-node:167 - INFO: Begin to run Node-ann-search/milvus-client-2
2025-04-17 17:03:13,746 - 16898895872 - node.py-node:167 - INFO: Begin to run Node-lambda-3
2025-04-17 17:03:13,746 - 18203308032 - node.py-node:167 - INFO: Begin to run Node-_output


Finished searching data in msrvtt_vid_hyp_tun_4
Searching data in msrvtt_vid_hyp_tun_6


2025-04-17 17:04:26,206 - 18220134400 - node.py-node:167 - INFO: Begin to run Node-_input
2025-04-17 17:04:26,207 - 18236960768 - node.py-node:167 - INFO: Begin to run Node-read_video_search_csv-0
2025-04-17 17:04:26,207 - 18687864832 - node.py-node:167 - INFO: Begin to run Node-video-text-embedding/clip4clip-1
2025-04-17 17:04:26,207 - 20599304192 - node.py-node:167 - INFO: Begin to run Node-ann-search/milvus-client-2
2025-04-17 17:04:26,208 - 18220134400 - node.py-node:167 - INFO: Begin to run Node-lambda-3
2025-04-17 17:04:26,208 - 20697870336 - node.py-node:167 - INFO: Begin to run Node-_output


Finished searching data in msrvtt_vid_hyp_tun_6
Searching data in msrvtt_vid_hyp_tun_8


2025-04-17 17:11:56,785 - 20845719552 - node.py-node:167 - INFO: Begin to run Node-_input
2025-04-17 17:11:56,785 - 25988034560 - node.py-node:167 - INFO: Begin to run Node-read_video_search_csv-0
2025-04-17 17:11:56,786 - 26004860928 - node.py-node:167 - INFO: Begin to run Node-video-text-embedding/clip4clip-1
2025-04-17 17:11:56,786 - 26088796160 - node.py-node:167 - INFO: Begin to run Node-ann-search/milvus-client-2
2025-04-17 17:11:56,787 - 26105622528 - node.py-node:167 - INFO: Begin to run Node-lambda-3
2025-04-17 17:11:56,787 - 26122448896 - node.py-node:167 - INFO: Begin to run Node-_output


Finished searching data in msrvtt_vid_hyp_tun_8
Searching data in msrvtt_vid_hyp_tun_10


2025-04-17 17:13:10,464 - 26139275264 - node.py-node:167 - INFO: Begin to run Node-_input
2025-04-17 17:13:10,464 - 26349137920 - node.py-node:167 - INFO: Begin to run Node-read_video_search_csv-0
2025-04-17 17:13:10,464 - 26365964288 - node.py-node:167 - INFO: Begin to run Node-video-text-embedding/clip4clip-1
2025-04-17 17:13:10,465 - 26382790656 - node.py-node:167 - INFO: Begin to run Node-ann-search/milvus-client-2
2025-04-17 17:13:10,465 - 26139275264 - node.py-node:167 - INFO: Begin to run Node-lambda-3
2025-04-17 17:13:10,465 - 26399617024 - node.py-node:167 - INFO: Begin to run Node-_output


Finished searching data in msrvtt_vid_hyp_tun_10
Searching data in msrvtt_vid_hyp_tun_12
Finished searching data in msrvtt_vid_hyp_tun_12


In [46]:
# Store all these dataframes into results folder for further evaluation

for uv, search_results_df in experiment_query_results.items():
    search_results_df.to_csv(f"query_results/c4c_hyp_tuning_uv_{uv}.csv", index=False)
    print(f"Saved search results for {uv} to CSV.")

Saved search results for 4 to CSV.
Saved search results for 6 to CSV.
Saved search results for 8 to CSV.
Saved search results for 10 to CSV.
Saved search results for 12 to CSV.


In [47]:
# Preview eval results in a table
results_df = pd.DataFrame()
scores = []
for uv, search_results_df in experiment_query_results.items():
    scores = my_eval_utils.get_all_eval_scores(search_results_df)
    scores.append((uv, scores))

NameError: name 'np' is not defined

In [48]:
search_results_df

Unnamed: 0,rel_video_id,query,top1,top5,top10,ground_truth
0,video7579,a girl wearing red top and black trouser is pu...,"[[7579, 1.4419159889221191]]","[[7579, 1.4419159889221191], [9969, 1.45477879...","[[7579, 1.4419159889221191], [9969, 1.45477879...",7579
1,video7725,young people sit around the edges of a room cl...,"[[7725, 1.3929578065872192]]","[[7725, 1.3929578065872192], [8339, 1.46684432...","[[7725, 1.3929578065872192], [8339, 1.46684432...",7725
2,video9258,a person is using a phone,"[[9258, 1.4097641706466675]]","[[9258, 1.4097641706466675], [9257, 1.44244837...","[[9258, 1.4097641706466675], [9257, 1.44244837...",9258
3,video7365,cartoon people are eating at a restaurant,"[[7365, 1.4170596599578857]]","[[7365, 1.4170596599578857], [9537, 1.45860767...","[[7365, 1.4170596599578857], [9537, 1.45860767...",7365
4,video8068,a woman on a couch talks to a a man,"[[8068, 1.4879887104034424]]","[[8068, 1.4879887104034424], [9919, 1.50211000...","[[8068, 1.4879887104034424], [9919, 1.50211000...",8068
...,...,...,...,...,...,...
995,video7034,man in black shirt is holding a baby upside do...,"[[9320, 1.50126314163208]]","[[9320, 1.50126314163208], [9404, 1.5271823406...","[[9320, 1.50126314163208], [9404, 1.5271823406...",7034
996,video7568,the queen of england is seen walking with an e...,"[[7568, 1.289296269416809]]","[[7568, 1.289296269416809], [7116, 1.401354074...","[[7568, 1.289296269416809], [7116, 1.401354074...",7568
997,video7979,people talking about a fight,"[[7979, 1.4600741863250732]]","[[7979, 1.4600741863250732], [7211, 1.48817718...","[[7979, 1.4600741863250732], [7211, 1.48817718...",7979
998,video7356,a vehicle with details on what comes with it b...,"[[7356, 1.3331942558288574]]","[[7356, 1.3331942558288574], [9358, 1.48054170...","[[7356, 1.3331942558288574], [9358, 1.48054170...",7356


In [49]:
my_eval_utils.get_all_eval_scores(search_results_df)

NameError: name 'np' is not defined