In [1]:
!pip install twelvelabs

Defaulting to user installation because normal site-packages is not writeable


In [35]:
# Install the Twelve Labs SDK
# !pip3 install twelvelabs
# !pip install ffmpeg-python
# !pip install pandas

import os
from twelvelabs import TwelveLabs
from twelvelabs.models.task import Task
import pandas as pd
import ffmpeg
import hashlib
import re
from pathlib import Path
import csv
import shutil
from dotenv import load_dotenv


load_dotenv()


API_KEY = "tlk_0QGAQCF0RSEGVT2GZTM1F1RJJ887"
# INDEX_NAME = "security"
# VIDEO_PATH = "Sequence_08.mp4"  # Path to the video file
# VIDEO_PATH = "NA.mp4"  # Path to the video file

# Define the search query
# QUERY_TEXT = "a man jumping the fence only"

def generate_data(VIDEO_PATH, QUERY_TEXT,INDEX_NAME, VIDEO_UPLOAD = 0, DELETE = True, API_KEY=API_KEY, TOPK = 5):
    if DELETE:  
        if os.path.exists("output"):
            shutil.rmtree("output")
            print(f"'{"output"}' has been removed.")
        else:
            print(f"'{"output"}' does not exist.")
        os.mkdir("output")
    
    # Initialize the client with your API key
    client = TwelveLabs(api_key=API_KEY)
    
    # index access
    
    # Step 1: Search for the existing index by name
    found_index = None
    try:
        search_results = client.index.list()  # Fetch the list of indexes
    except Exception as e:
        print(f"Error retrieving indexes: {e}")
        search_results = []  # Ensure search_results is defined, even if retrieval fails
    
    for idx in search_results.root:  # Assuming search_results contains the list of indexes
        if idx.name == INDEX_NAME:
            found_index = idx
            break
    
    # Step 2: If the index exists, store its ID
    if found_index:
        print(f"Index already exists: id={found_index.id}, name={found_index.name}")
        index_id = found_index.id
    
    # Step 3: If the index does not exist, create a new one
    else:
        print(f"Index with name '{INDEX_NAME}' not found. Creating a new index...")
    
        try:
            index = client.index.create(
                name=INDEX_NAME,  # Use the desired name for the index
                models=[
                    {
                        "name": "marengo2.7",  # Engine for video understanding
                        "options": ["visual", "audio"],  # Options for visual, conversation, and text
                    }
                ],
                addons=["thumbnail"],  # Optional thumbnail addon
            )
            print(f"Created new index: id={index.id} name={index.name}")
            index_id = index.id  # Store the new index ID
    
        except Exception as e:
            print(f"Error during index creation: {e}")
    
    # Now you have the index ID stored in `index_id`, whether it was found or created
    print(f"Using index ID: {index_id}")
    
    
    # Step 2: Upload the video
    
    if VIDEO_UPLOAD:
        try:
            print(f"Uploading {VIDEO_PATH}") 
            task = client.task.create(index_id=index_id, file=VIDEO_PATH, language="en")  # Upload the video
            print(f"Created task: id={task.id}")
            
            # Monitor the upload and indexing process
            def on_task_update(task: Task):
                print(f"  Status={task.status}")
        
            task.wait_for_done(sleep_interval=50, callback=on_task_update)  # Wait until the task is done
            
            if task.status != "ready":
                raise RuntimeError(f"Indexing failed with status {task.status}")
            
            print(f"Uploaded {VIDEO_PATH}. The unique identifier of your video is {task.video_id}.")
            
        except Exception as e:
            print(f"Error during video upload or processing: {e}")
    
    # HELPER
    
    def normalize_query(query):
        # Remove special characters and convert text to lowercase
        return re.sub(r'\W+', '', query.lower())
    
    def get_folder_name_from_query(query, folder_base=""):
        # Normalize the query
        normalized_query = normalize_query(query)
        
        # Create a hash from the normalized query
        hash_object = hashlib.md5(normalized_query.encode())  # Using MD5 hash (you can also use SHA256)
        folder_hash = hash_object.hexdigest()[:8]  # Using first 8 characters of the hash
    
        # Construct folder path using the folder base and hash
        folder_path = Path(folder_base) / folder_hash
        return folder_path, folder_hash
    
    def map_query_to_csv(query, folder_path, csv_file="output/query_mapping.csv"):
        # Write the query and folder path to the CSV file
        with open(csv_file, mode='a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow([query, folder_path])
        print("done")
    
    FOLDER_NAME, FOLDER_HASH = get_folder_name_from_query(QUERY_TEXT)
    map_query_to_csv(QUERY_TEXT, FOLDER_NAME, csv_file="output/query_mapping.csv")
    if not os.path.exists(Path("output") / FOLDER_NAME):
        os.makedirs(Path("output") / FOLDER_NAME)
    # List to hold all results
    results = []
    
    try:
        # Step 1: Perform the search
        search_results = client.search.query(
        index_id=index_id, 
        query_text=QUERY_TEXT,
        options=["visual", "audio"])
    
        for clip in search_results.data:
            print(f"Video ID: {clip.video_id}, Confidence: {clip.confidence}, Score: {clip.score}")
        
        idx = 0
        # Step 2: Process each page of results
        def collect_results(page, idx):
            i = 0
            for clip in page:
                # Append each result to the list as a dictionary
                results.append({
                    'index_clip': str(FOLDER_NAME) + "_" + str(idx)+ "_" + str(i),
                    'video_id': clip.video_id,
                    'score': clip.score,
                    'start': clip.start,
                    'end': clip.end,
                    'confidence': clip.confidence,
                    'thumbnail_url': clip.thumbnail_url
                })
                i += 1
    
        # Step 3: Collect the results from the first page
        collect_results(search_results.data, idx)
        idx += 1
        
        # Step 4: Handle pagination
        while True:
            try:
                collect_results(next(search_results), idx)  # Get the next page of results
                idx += 1
            except StopIteration:
                break  # Exit loop when there are no more pages
    
        # Step 5: Convert the results to a pandas DataFrame
        df = pd.DataFrame(results)
        
        # Step 6: Save the DataFrame to a CSV file
        df.to_csv(f"output/{FOLDER_NAME}/search_results.csv", index=False)
        print(f"Results have been saved to output/{FOLDER_NAME}/search_results.csv")
    
    except Exception as e:
        print(f"Error during search: {e}")
    
    
    
    # Load the CSV file
    csv_file = f"output/{FOLDER_NAME}/search_results.csv"  # Replace with the path to your CSV file
    data = pd.read_csv(csv_file)
    
    # # Define the input video file path (constant for all clips)
    # input_video = VIDEO_PATH # Replace with your video file path
    
    # # Loop through the CSV rows and extract clips
    # for index, row in data.iterrows():
    #     index_clip = row['index_clip']  # Use index_clip from the CSV
    #     start_time = row['start']
    #     end_time = row['end']
        
    #     # Dynamic output file name using index_clip from the CSV
    #     output_clip = f"output/{FOLDER_NAME}/{index_clip}.mp4"
    
    #     # Use FFmpeg to extract the subclip
    #     try:
    #         ffmpeg.input(VIDEO_PATH, ss=start_time, t=end_time - start_time).output(output_clip).run()
    #         print(f"Clip saved to {output_clip}")
    #     except ffmpeg.Error as e:
    #         print(f"Error processing clip {index}: {e}")

    return FOLDER_HASHdfr
# generate_data(API_KEY, INDEX_NAME, VIDEO_PATH, QUERY_TEXT, 1, 0, 5)

In [41]:
generate_data(API_KEY = API_KEY, INDEX_NAME = "nsec3", VIDEO_PATH = "./manf.mp4", QUERY_TEXT = "a boy in green shirt drinking water from a bottle", DELETE = 1, VIDEO_UPLOAD = 0, TOPK = 5)

'output' has been removed.
Index already exists: id=6795b89781c61d7813698eef, name=nsec3
Using index ID: 6795b89781c61d7813698eef
done
Video ID: 6795b915f21362a14566e310, Confidence: high, Score: 84.2
Video ID: 6795b915f21362a14566e310, Confidence: high, Score: 83.76
Video ID: 6795b915f21362a14566e310, Confidence: medium, Score: 80.39
Video ID: 6795b94bf21362a14566e311, Confidence: medium, Score: 78.34
Video ID: 6795b8a5f21362a14566e30f, Confidence: medium, Score: 78.33
Video ID: 6795b94bf21362a14566e311, Confidence: medium, Score: 76.12
Video ID: 6795b8a5f21362a14566e30f, Confidence: medium, Score: 74.44
Video ID: 6795b8dfcf67133816b2e54e, Confidence: medium, Score: 74.09
Video ID: 6795b8dfcf67133816b2e54e, Confidence: low, Score: 71.98
Results have been saved to output/79f4b420/search_results.csv


NameError: name 'FOLDER_HASHdfr' is not defined

In [9]:
# Load the CSV fil
csv_file = f"output/0eae8ceb/search_results.csv"  # Replace with the path to your CSV file
data = pd.read_csv(csv_file)
data['confidence'] = pd.to_numeric(data['score'], errors='coerce')

# Filter out NaN values if necessary
# data = data.dropna(subset=['confidence'])


if (data['score'] > 75).any():
    print(True)
    # return True
else:
    print(False)
    # return False

True


In [8]:
data

Unnamed: 0,index_clip,video_id,score,start,end,confidence,thumbnail_url
0,0eae8ceb_0_0,67956c68cf67133816b2e527,84.54,0.0,19.646,,https://project-one-thumbnail.s3.us-west-2.ama...
1,0eae8ceb_0_1,67956c30f21362a14566e2e6,84.32,0.0,29.0,,https://project-one-thumbnail.s3.us-west-2.ama...
2,0eae8ceb_0_2,67956cb6f21362a14566e2e7,83.73,0.0,15.7185,,https://project-one-thumbnail.s3.us-west-2.ama...
3,0eae8ceb_0_3,67956c68cf67133816b2e527,83.69,19.646,29.0,,https://project-one-thumbnail.s3.us-west-2.ama...
4,0eae8ceb_0_4,67956ceff21362a14566e2e8,83.56,0.0,9.823,,https://project-one-thumbnail.s3.us-west-2.ama...
5,0eae8ceb_0_5,67956ceff21362a14566e2e8,83.44,19.646,29.0,,https://project-one-thumbnail.s3.us-west-2.ama...
6,0eae8ceb_0_6,67956cb6f21362a14566e2e7,83.4,15.7185,31.0,,https://project-one-thumbnail.s3.us-west-2.ama...
7,0eae8ceb_0_7,67956ceff21362a14566e2e8,83.2,9.823,19.646,,https://project-one-thumbnail.s3.us-west-2.ama...


In [14]:
generate_data(API_KEY = API_KEY, INDEX_NAME = "security1", VIDEO_PATH = "./manf.mp4", QUERY_TEXT = QUERY_TEXT, DELETE = 1, VIDEO_UPLOAD = , TOPK = 5)

'output' has been removed.
Index already exists: id=67951381246c42594d6919c7, name=security1
Using index ID: 67951381246c42594d6919c7
done
Results have been saved to output/dd0a04c2/search_results.csv


ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

Clip saved to output/dd0a04c2/dd0a04c2_0_0.mp4


[libx264 @ 0x5b924d7dd440] using SAR=1/1
[libx264 @ 0x5b924d7dd440] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2
[libx264 @ 0x5b924d7dd440] profile High, level 4.2, 4:2:0, 8-bit
[libx264 @ 0x5b924d7dd440] 264 - core 163 r3060 5db6aa6 - H.264/MPEG-4 AVC codec - Copyleft 2003-2021 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=-2 threads=24 lookahead_threads=4 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min=25 scenecut=40 intra_refresh=0 rc_lookahead=40 rc=crf mbtree=1 crf=23.0 qcomp=0.60 qpmin=0 qpmax=69 qpstep=4 ip_ratio=1.40 aq=1:1.00
Output #0, mp4, to 'output/dd0a04c2/dd0a04c2_0_1.mp4':
  Metadata:
    minor_version   : 512
    major_bra

Clip saved to output/dd0a04c2/dd0a04c2_0_1.mp4


[libx264 @ 0x5d008c5d4440] using SAR=1/1
[libx264 @ 0x5d008c5d4440] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2
[libx264 @ 0x5d008c5d4440] profile High, level 4.2, 4:2:0, 8-bit
[libx264 @ 0x5d008c5d4440] 264 - core 163 r3060 5db6aa6 - H.264/MPEG-4 AVC codec - Copyleft 2003-2021 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=-2 threads=24 lookahead_threads=4 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min=25 scenecut=40 intra_refresh=0 rc_lookahead=40 rc=crf mbtree=1 crf=23.0 qcomp=0.60 qpmin=0 qpmax=69 qpstep=4 ip_ratio=1.40 aq=1:1.00
Output #0, mp4, to 'output/dd0a04c2/dd0a04c2_0_2.mp4':
  Metadata:
    minor_version   : 512
    major_bra

Clip saved to output/dd0a04c2/dd0a04c2_0_2.mp4


[libx264 @ 0x63db98ed8440] using SAR=1/1
[libx264 @ 0x63db98ed8440] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2
[libx264 @ 0x63db98ed8440] profile High, level 4.2, 4:2:0, 8-bit
[libx264 @ 0x63db98ed8440] 264 - core 163 r3060 5db6aa6 - H.264/MPEG-4 AVC codec - Copyleft 2003-2021 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=-2 threads=24 lookahead_threads=4 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min=25 scenecut=40 intra_refresh=0 rc_lookahead=40 rc=crf mbtree=1 crf=23.0 qcomp=0.60 qpmin=0 qpmax=69 qpstep=4 ip_ratio=1.40 aq=1:1.00
Output #0, mp4, to 'output/dd0a04c2/dd0a04c2_0_3.mp4':
  Metadata:
    minor_version   : 512
    major_bra

Clip saved to output/dd0a04c2/dd0a04c2_0_3.mp4


[libx264 @ 0x5b7b2312c440] using SAR=1/1
[libx264 @ 0x5b7b2312c440] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2
[libx264 @ 0x5b7b2312c440] profile High, level 4.2, 4:2:0, 8-bit
[libx264 @ 0x5b7b2312c440] 264 - core 163 r3060 5db6aa6 - H.264/MPEG-4 AVC codec - Copyleft 2003-2021 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=-2 threads=24 lookahead_threads=4 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min=25 scenecut=40 intra_refresh=0 rc_lookahead=40 rc=crf mbtree=1 crf=23.0 qcomp=0.60 qpmin=0 qpmax=69 qpstep=4 ip_ratio=1.40 aq=1:1.00
Output #0, mp4, to 'output/dd0a04c2/dd0a04c2_0_4.mp4':
  Metadata:
    minor_version   : 512
    major_bra

Clip saved to output/dd0a04c2/dd0a04c2_0_4.mp4


frame= 3585 fps=214 q=-1.0 Lsize=    5480kB time=00:00:59.72 bitrate= 751.8kbits/s dup=3 drop=0 speed=3.57x    
video:5368kB audio:15kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 1.808036%
[libx264 @ 0x5b7b2312c440] frame I:15    Avg QP:18.50  size:150989
[libx264 @ 0x5b7b2312c440] frame P:903   Avg QP:20.11  size:  3022
[libx264 @ 0x5b7b2312c440] frame B:2667  Avg QP:19.62  size:   188
[libx264 @ 0x5b7b2312c440] consecutive B-frames:  0.8%  0.1%  0.2% 99.0%
[libx264 @ 0x5b7b2312c440] mb I  I16..4: 12.2% 66.2% 21.6%
[libx264 @ 0x5b7b2312c440] mb P  I16..4:  0.8%  1.0%  0.1%  P16..4:  5.0%  1.0%  0.5%  0.0%  0.0%    skip:91.6%
[libx264 @ 0x5b7b2312c440] mb B  I16..4:  0.0%  0.0%  0.0%  B16..8:  2.0%  0.0%  0.0%  direct: 0.0%  skip:97.9%  L0:36.4% L1:63.5% BI: 0.2%
[libx264 @ 0x5b7b2312c440] 8x8 transform intra:57.0% inter:78.6%
[libx264 @ 0x5b7b2312c440] coded y,uvDC,uvAC intra: 56.8% 44.8% 14.5% inter: 0.5% 0.9% 0.0%
[libx264 @ 0x5b7b2312c440] i16 v,h,dc,p: 24% 

'dd0a04c2'

In [10]:
import pandas as pd
df = pd.read_csv("output/325f99ab/search_results.csv")

In [11]:
df

Unnamed: 0,index_clip,video_id,score,start,end,confidence,thumbnail_url
0,325f99ab_0_0,679513d4f21362a14566e29d,84.43,59.766666,67.682443,high,https://project-one-thumbnail.s3.us-west-2.ama...
1,325f99ab_0_1,679513d4f21362a14566e29d,84.37,75.59822,83.513996,high,https://project-one-thumbnail.s3.us-west-2.ama...
2,325f99ab_0_2,679513d4f21362a14566e29d,83.96,83.513996,91.0,high,https://project-one-thumbnail.s3.us-west-2.ama...
3,325f99ab_0_3,679513d4f21362a14566e29d,83.93,67.682443,75.59822,high,https://project-one-thumbnail.s3.us-west-2.ama...
4,325f99ab_0_4,679513d4f21362a14566e29d,83.5,0.0,59.733333,high,https://project-one-thumbnail.s3.us-west-2.ama...
