
# CLIP Classify Content of Video

---
[![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://github.com/roboflow/notebooks/blob/main/notebooks/vector-analysis-with-sklearn-and-bokeh.ipynb)
[![Roboflow](https://raw.githubusercontent.com/roboflow-ai/notebooks/main/assets/badges/roboflow-blogpost.svg)](https://blog.roboflow.com/vector-analysis)
[![GitHub](https://badges.aleen42.com/src/github.svg)](https://github.com/roboflow/notebooks)

Click the Open in Colab button to run the cookbook on Google Colab.

**Let's begin!**

# Install required packages
In this cookbook, we'll leverage two Python packages - `opencv` and `supervision`

In [None]:
!pip install supervision opencv-python

## Imports & Configure Roboflow Inference Server

In [25]:
import requests
import base64
from PIL import Image
from io import BytesIO
import os
import supervision as sv
from tqdm import tqdm
from supervision import get_video_frames_generator
import time

INFERENCE_ENDPOINT = "https://infer.roboflow.com"
API_KEY = "YOUR_API_KEY"
VIDEO = "VIDEO_PATH"



## Prompt List for CLIP similarity function

In [None]:
#Prompt list to evaluate similarity between each image and each prompt. If something else is selected, then we ignore the caption
#change this to your desired prompt list
prompt_list = [['action video game shooting xbox','Drake rapper music','soccer game ball',
                'marvel combic book','beyonce','Church pope praying',
                'Mcdonalds French Fries',"something else"]]

## CLIP Endpoint Compare Frame & Prompt List Similarity

In [26]:
def classify_image(image: str, prompt: str) -> dict:
    
    image_data = Image.fromarray(image)

    buffer = BytesIO()
    image_data.save(buffer, format="JPEG")
    image_data = base64.b64encode(buffer.getvalue()).decode("utf-8")

    payload = {
        "api_key": API_KEY,
        "subject": {
            "type": "base64",
            "value": image_data
        },
        "prompt": prompt,
    }

    data = requests.post(INFERENCE_ENDPOINT + "/clip/compare?api_key=" + API_KEY, json=payload)

    response = data.json()
    #print(response["similarity"])
    sim = response["similarity"]

    highest_prediction = 0
    highest_prediction_index = 0

    for i, prediction in enumerate(response["similarity"]):
        if prediction > highest_prediction:
            highest_prediction = prediction
            highest_prediction_index = i

    return prompt[highest_prediction_index], sim[highest_prediction_index]

# Process Video & Return Most Similar Prompt to Frame

In [None]:

def process_video_frames(video_path, prompt_list, total_frames=160, total_seconds=80, stride_length=30,max_retries):
    if not os.path.exists(video_path):
        print(f"The specified video file '{video_path}' does not exist.")
        return

    frames_per_second = total_frames / total_seconds
    frame_dict = {}

    for frame_index, frame in enumerate(sv.get_video_frames_generator(source_path=video_path, stride=stride_length, start=0)):
        frame_second = frame_index * (1 / frames_per_second)
        frame_key = f"Frame {frame_index}: {frame_second:.2f} seconds"
        frame_dict[frame_key] = []

        print(frame_key)
        retries = 0

        for prompt in prompt_list:
            try: 
                label, similarity = classify_image(frame)
                if label != "something else":
                    print('label found')
                    frame_dict[frame_key].append({label: similarity})
                    print('\n')

            except Exception as e:
                retries += 1
                print(f"Error: {e}")
                print(f"Retrying... (Attempt {retries}/{max_retries})")

                if retries >= max_retries:
                    print("Max retries exceeded. Skipping frame.")
                    break

    return frame_dict

# Example usage:
max_retries = 4
prompt_list = prompt_list
frame_results = process_video_frames(VIDEO, prompt_list,max_retries)


In [41]:

#print(os.path.exists(VIDEO))

max_retries = 4

total_frames = 160
total_seconds = 80
stride_length = 30
frames_per_second = total_frames / total_seconds
frame_dict = {}
num_frames = 0
for frame_index,frame in enumerate(sv.get_video_frames_generator(source_path=VIDEO,stride=stride_length,start =0)):
    frame_second = frame_index * (1 / frames_per_second)
    frame_dict[f"Frame {frame_index}: {frame_second:.2f} seconds"] = []
    if frame_index < 200:
        print(f"Frame {frame_index}: {frame_second:.2f} seconds")
        retries = 0
        for prompt in prompt_list:
            try: 
                label,similarity = classify_image(frame)
                if label != "something else":
                    print('label found')
                    frame_dict[f"Frame {frame_index}: {frame_second:.2f} seconds"].append({label:similarity})
                    #frame_dict[f"Frame {frame_index}: {frame_second:.2f} seconds"].append(similarity)
                    
                    print('\n')
            except Exception as e:
                retries+=1
                print(f"Error: {e}")
                print(f"Retrying... (Attempt {retries + 1}/{max_retries})")
                retries += 1
                time.sleep(1)  # Wait for a short period before retrying

Frame 0: 0.00 seconds
label found


Frame 1: 0.50 seconds
label found


Frame 2: 1.00 seconds
label found


Frame 3: 1.50 seconds
label found


Frame 4: 2.00 seconds
label found


Frame 5: 2.50 seconds
label found


Frame 6: 3.00 seconds
label found


Frame 7: 3.50 seconds
label found


Frame 8: 4.00 seconds
label found


Frame 9: 4.50 seconds
label found


Frame 10: 5.00 seconds
label found


Frame 11: 5.50 seconds
label found


Frame 12: 6.00 seconds
label found


Frame 13: 6.50 seconds
label found


Frame 14: 7.00 seconds
label found


Frame 15: 7.50 seconds
label found


Frame 16: 8.00 seconds
label found


Frame 17: 8.50 seconds
label found


Frame 18: 9.00 seconds
label found


Frame 19: 9.50 seconds
label found


Frame 20: 10.00 seconds
label found


Frame 21: 10.50 seconds
label found


Frame 22: 11.00 seconds
label found


Frame 23: 11.50 seconds
label found


Frame 24: 12.00 seconds
label found


Frame 25: 12.50 seconds
label found


Frame 26: 13.00 seconds
label found


Fram

In [42]:
frame_dict

{'Frame 0: 0.00 seconds': [{'action video game shooting xbox': 0.251544279116603}],
 'Frame 1: 0.50 seconds': [{'action video game shooting xbox': 0.2567858361741459}],
 'Frame 2: 1.00 seconds': [{'action video game shooting xbox': 0.264930826243032}],
 'Frame 3: 1.50 seconds': [{'action video game shooting xbox': 0.26455160526266686}],
 'Frame 4: 2.00 seconds': [{'action video game shooting xbox': 0.2620280756204121}],
 'Frame 5: 2.50 seconds': [{'action video game shooting xbox': 0.25951293189309643}],
 'Frame 6: 3.00 seconds': [{'action video game shooting xbox': 0.27337254129373756}],
 'Frame 7: 3.50 seconds': [{'action video game shooting xbox': 0.27337921634868095}],
 'Frame 8: 4.00 seconds': [{'action video game shooting xbox': 0.26934443561608745}],
 'Frame 9: 4.50 seconds': [{'action video game shooting xbox': 0.28382190441216126}],
 'Frame 10: 5.00 seconds': [{'action video game shooting xbox': 0.25426001338438914}],
 'Frame 11: 5.50 seconds': [{'action video game shooting xb

In [46]:
# Flatten the nested dictionary
data = frame_dict
# Define the threshold
threshold = 0.22

# Filter out key-value pairs below the threshold for each frame
filtered_data = [
    {
        frame: [
            {key: value}
            for item in items
            for key, value in item.items()
            if value > threshold
        ]
    }
    for frame, items in data.items()
]

print(filtered_data)

[{'Frame 0: 0.00 seconds': [{'action video game shooting xbox': 0.251544279116603}]}, {'Frame 1: 0.50 seconds': [{'action video game shooting xbox': 0.2567858361741459}]}, {'Frame 2: 1.00 seconds': [{'action video game shooting xbox': 0.264930826243032}]}, {'Frame 3: 1.50 seconds': [{'action video game shooting xbox': 0.26455160526266686}]}, {'Frame 4: 2.00 seconds': [{'action video game shooting xbox': 0.2620280756204121}]}, {'Frame 5: 2.50 seconds': [{'action video game shooting xbox': 0.25951293189309643}]}, {'Frame 6: 3.00 seconds': [{'action video game shooting xbox': 0.27337254129373756}]}, {'Frame 7: 3.50 seconds': [{'action video game shooting xbox': 0.27337921634868095}]}, {'Frame 8: 4.00 seconds': [{'action video game shooting xbox': 0.26934443561608745}]}, {'Frame 9: 4.50 seconds': [{'action video game shooting xbox': 0.28382190441216126}]}, {'Frame 10: 5.00 seconds': [{'action video game shooting xbox': 0.25426001338438914}]}, {'Frame 11: 5.50 seconds': [{'action video game

In [44]:
# Specify the filename for the JSON file
import json
filename = 'telly_clip'+str(threshold)+'.json'

# Write the dictionary to the JSON file
with open(filename, 'w') as json_file:
    json.dump(filtered_data, json_file, indent=4)  # The indent parameter is optional for pretty-printing

#print(f'Data has been written to {filename})

## extra

In [9]:

#print(os.path.exists(VIDEO))
import time
max_retries = 4

total_frames = 171
total_seconds = 80
stride_length = 30
frames_per_second = total_frames / total_seconds
frame_dict = {}
num_frames = 0
for frame_index,frame in enumerate(sv.get_video_frames_generator(source_path=VIDEO,stride=stride_length,start =0)):
    frame_second = frame_index * (1 / frames_per_second)
    frame_dict[f"Frame {frame_index}: {frame_second:.2f} seconds"] = []
    if frame_index < 3:
        print(f"Frame {frame_index}: {frame_second:.2f} seconds")
        for prompt in prompt_list:
            try: 
                label,similarity = classify_image(frame)
                if label == prompt[0]:
                    print('label found')
                    frame_dict[f"Frame {frame_index}: {frame_second:.2f} seconds"].append({label:similarity})
                    #frame_dict[f"Frame {frame_index}: {frame_second:.2f} seconds"].append(similarity)
                    
                    print('\n')
            except Exception as e:
                print(f"Error: {e}")
                print(f"Retrying... (Attempt {retries + 1}/{max_retries})")
                retries += 1
                time.sleep(1)  # Wait for a short period before retrying

Frame 0: 0.00 seconds
label found


label found


label found


label found


label found


label found


label found


Frame 1: 0.47 seconds
label found


label found


label found


label found


label found


label found


label found


Frame 2: 0.94 seconds
label found


label found


label found


label found


label found


label found


label found


Frame 3: 1.40 seconds
label found


label found


label found


label found


label found


label found


label found


Frame 4: 1.87 seconds
label found


label found


label found


label found


label found


label found


label found


Frame 5: 2.34 seconds
label found


label found


label found


label found


label found


label found


label found


Frame 6: 2.81 seconds
label found


label found


label found


label found


label found


label found


label found


Frame 7: 3.27 seconds
label found


label found


label found


label found


label found


label found


label found


Frame 8: 3.74 seconds
label found


labe

KeyboardInterrupt: 

In [121]:
frame_dict

{'Frame 0: 0.00 seconds': [{'action video game shooting xbox': 0.251544279116603}],
 'Frame 1: 0.50 seconds': [{'action video game shooting xbox': 0.2567858361741459}],
 'Frame 2: 1.00 seconds': [{'action video game shooting xbox': 0.264930826243032},
  {'youtube video commercial trailer': 0.22509281064762096}],
 'Frame 3: 1.49 seconds': [{'action video game shooting xbox': 0.26455160526266686},
  {'youtube video commercial trailer': 0.22196117136207832}],
 'Frame 4: 1.99 seconds': [{'action video game shooting xbox': 0.2620280756204121},
  {'youtube video commercial trailer': 0.2330172919859395}],
 'Frame 5: 2.49 seconds': [{'action video game shooting xbox': 0.25951293189309643},
  {'youtube video commercial trailer': 0.22166856130776694}],
 'Frame 6: 2.99 seconds': [{'action video game shooting xbox': 0.27337254129373756}],
 'Frame 7: 3.48 seconds': [{'action video game shooting xbox': 0.27337921634868095},
  {'youtube video commercial trailer': 0.23314553655320758}],
 'Frame 8: 3.9

In [255]:
# Flatten the nested dictionary
data = frame_dict
# Define the threshold
threshold = 0.27

# Filter out key-value pairs below the threshold for each frame
filtered_data = [
    {
        frame: [
            {key: value}
            for item in items
            for key, value in item.items()
            if value > threshold
        ]
    }
    for frame, items in data.items()
]

print(filtered_data)

[{'Frame 0: 0.00 seconds': []}, {'Frame 1: 0.50 seconds': []}, {'Frame 2: 1.00 seconds': []}, {'Frame 3: 1.49 seconds': []}, {'Frame 4: 1.99 seconds': []}, {'Frame 5: 2.49 seconds': []}, {'Frame 6: 2.99 seconds': []}, {'Frame 7: 3.48 seconds': []}, {'Frame 8: 3.98 seconds': []}, {'Frame 9: 4.48 seconds': [{'action video game shooting xbox': 0.28382190441216126}]}, {'Frame 10: 4.98 seconds': []}, {'Frame 11: 5.48 seconds': [{'action video game shooting xbox': 0.2857121689011761}]}, {'Frame 12: 5.97 seconds': []}, {'Frame 13: 6.47 seconds': [{'action video game shooting xbox': 0.2838515535424833}]}, {'Frame 14: 6.97 seconds': []}, {'Frame 15: 7.47 seconds': []}, {'Frame 16: 7.96 seconds': []}, {'Frame 17: 8.46 seconds': []}, {'Frame 18: 8.96 seconds': []}, {'Frame 19: 9.46 seconds': []}, {'Frame 20: 9.95 seconds': []}, {'Frame 21: 10.45 seconds': [{'Drake rapper music': 0.2852440045098531}]}, {'Frame 22: 10.95 seconds': [{'Drake rapper music': 0.2986185267688645}]}, {'Frame 23: 11.45 sec

In [256]:
# Specify the filename for the JSON file
filename = 'telly_clip.json'

# Write the dictionary to the JSON file
with open(filename, 'w') as json_file:
    json.dump(filtered_data, json_file, indent=4)  # The indent parameter is optional for pretty-printing

#print(f'Data has been written to {filename})

In [248]:
# Flatten the data
flat_data = []
for _dict in filtered_data:
    frame_name, items = next(iter(_dict.items()))
    item_dict = next(iter(items))
    key, value = next(iter(item_dict.items()))
    flat_data.append({'Frame': frame_name, 'Key': key, 'Value': value})

# Create a DataFrame
ogdf = pd.DataFrame(flat_data)

StopIteration: 

In [242]:
len(filtered_data)

221

In [240]:
ogdf = make_df(filtered_data)

AttributeError: 'list' object has no attribute 'items'

In [243]:
final_dict = frame_dict

In [None]:
final_dict

In [244]:
# Specify the filename for the JSON file
filename = 'final_dict.json'

# Write the dictionary to the JSON file
with open(filename, 'w') as json_file:
    json.dump(frame_dict, json_file, indent=4)  # The indent parameter is optional for pretty-printing

print(f'Data has been written to {filename}')# The indent parameter is optional for pretty-printing


Data has been written to final_dict.json


In [168]:
dff2 = make_df(frame_dict)

In [169]:
dff2.head()

Unnamed: 0,Frame,Values
0,Frame 0: 0.00 seconds,{'action video game shooting xbox': 0.25154427...
1,Frame 1: 0.50 seconds,{'action video game shooting xbox': 0.25678583...
2,Frame 2: 1.00 seconds,{'action video game shooting xbox': 0.26493082...
3,Frame 3: 1.49 seconds,{'action video game shooting xbox': 0.26455160...
4,Frame 4: 1.99 seconds,{'action video game shooting xbox': 0.26202807...


In [182]:
data = {'action video game shooting xbox': 0.2620280756204121}, {'youtube video commercial trailer': 0.2330172919859395}

def filter_dict(data_dict, threshold):
    data_dict = list(data_dict)
    print(data_dict)
    filtered_dict = {key: value for entry in data_dict for key, value in entry.items() if value > threshold}
    return filtered_dict

filtered_data = filter_dict(data, 0.26)
print(filtered_data)

[{'action video game shooting xbox': 0.2620280756204121}, {'youtube video commercial trailer': 0.2330172919859395}]
{'action video game shooting xbox': 0.2620280756204121}


In [180]:
datadf = dff2

In [234]:
import json

# Sample string

# Using json.loads to convert the string into a dictionary

def filter_dict2(data_dict, threshold):
    data_dict = [data_dict]
    data_dict = str(data_dict[0])
    data_dict = json.loads(data_dict.replace("'", "\""))
    threshold = 0.2

    filtered_data_list = [item for item in data_dict if next(iter(item.values())) > threshold]

    
    return filtered_data_list

In [235]:
data_df['Values_filtered'] = data_df['Values'].apply(lambda x: filter_dict2(x, threshold))


AttributeError: 'str' object has no attribute 'values'

In [229]:
data_df

Unnamed: 0,Frame,Values,Values_filtered
0,Frame 0: 0.00 seconds,{'action video game shooting xbox': 0.25154427...,<class 'str'>
1,Frame 1: 0.50 seconds,{'action video game shooting xbox': 0.25678583...,<class 'str'>
2,Frame 2: 1.00 seconds,{'action video game shooting xbox': 0.26493082...,<class 'str'>
3,Frame 3: 1.49 seconds,{'action video game shooting xbox': 0.26455160...,<class 'str'>
4,Frame 4: 1.99 seconds,{'action video game shooting xbox': 0.26202807...,<class 'str'>
...,...,...,...
216,Frame 216: 107.51 seconds,,<class 'str'>
217,Frame 217: 108.01 seconds,{'youtube video commercial trailer': 0.2570667...,<class 'str'>
218,Frame 218: 108.51 seconds,{'youtube video commercial trailer': 0.2538582...,<class 'str'>
219,Frame 219: 109.00 seconds,{'youtube video commercial trailer': 0.2543571...,<class 'str'>


In [148]:
filtered_data

{'Drake rapper music': 0.2852440045098531,
 'youtube video commercial trailer': 0.24491280174877503}

## extra

In [71]:
dff

Unnamed: 0,Frame,Values
0,Frame 0: 0.00 seconds,{'action video game shooting xbox': 0.25154427...
1,Frame 1: 0.50 seconds,
2,Frame 2: 1.00 seconds,
3,Frame 3: 1.49 seconds,
4,Frame 4: 1.99 seconds,
...,...,...
216,Frame 216: 107.51 seconds,
217,Frame 217: 108.01 seconds,
218,Frame 218: 108.51 seconds,
219,Frame 219: 109.00 seconds,


In [45]:
frame_dict

{'Frame 0: 0.00 seconds': ['action video game shooting xbox',
  'music video singing dancing',
  'superhero action film video',
  'beyonce the singer video'],
 'Frame 1: 0.50 seconds': ['action video game shooting xbox',
  'Drake the rapper',
  'music video singing dancing',
  'superhero action film video',
  'beyonce the singer video',
  'Pete Davidson Mr. Beast Celebrity'],
 'Frame 2: 1.00 seconds': ['action video game shooting xbox',
  'music video singing dancing',
  'superhero action film video',
  'beyonce the singer video'],
 'Frame 3: 1.49 seconds': ['action video game shooting xbox',
  'music video singing dancing',
  'superhero action film video',
  'beyonce the singer video',
  'Mcdonalds Fast Food Fries people eating',
  'Church religion holy cross'],
 'Frame 4: 1.99 seconds': ['action video game shooting xbox',
  'music video singing dancing',
  'superhero action film video',
  'beyonce the singer video',
  'Mcdonalds Fast Food Fries people eating'],
 'Frame 5: 2.49 second

In [48]:
#prompt tweaking
#similarity threshold
#frequency

In [40]:

#print(os.path.exists(VIDEO))
import time
total_frames = 160
total_seconds = 110
stride_length = 30
frames_per_second = total_frames / total_seconds
frame_dict = {}
max_retries = 3
#print(os.path.exists(VIDEO))

num_frames = 0
for frame_index,frame in enumerate(sv.get_video_frames_generator(source_path=VIDEO,stride=stride_length,start =0)):
    frame_second = frame_index * (1 / frames_per_second)
    frame_dict[f"Frame {frame_index}: {frame_second:.2f} seconds"] = []
    #if frame_index < 30:
    print(f"Frame {frame_index}: {frame_second:.2f} seconds")
    retries = 0
    for prompt in prompt_list:
        retries = 0
        while retries < max_retries:
            try:
                label = classify_image(frame)
                if label == prompt[0]:
                    print('Label found')
                    frame_dict[f"Frame {frame_index}: {frame_second:.2f} seconds"].append(label)
                    print('\n')
                break  # Break the retry loop if successful
            except Exception as e:
                print(f"Error: {e}")
                print(f"Retrying... (Attempt {retries + 1}/{max_retries})")
                retries += 1
                time.sleep(1)  # Wait for a short period before retrying
print(num_frames)

Frame 0: 0.00 seconds
Frame 1: 0.69 seconds
Frame 2: 1.38 seconds
Frame 3: 2.06 seconds
Frame 4: 2.75 seconds
Frame 5: 3.44 seconds
Frame 6: 4.12 seconds
Frame 7: 4.81 seconds
Frame 8: 5.50 seconds
Frame 9: 6.19 seconds
Frame 10: 6.88 seconds
Frame 11: 7.56 seconds
Frame 12: 8.25 seconds
Frame 13: 8.94 seconds
Frame 14: 9.62 seconds
Frame 15: 10.31 seconds
Frame 16: 11.00 seconds
Frame 17: 11.69 seconds
Frame 18: 12.38 seconds
Frame 19: 13.06 seconds
Frame 20: 13.75 seconds
Frame 21: 14.44 seconds
Frame 22: 15.12 seconds
Frame 23: 15.81 seconds
Frame 24: 16.50 seconds
Frame 25: 17.19 seconds
Frame 26: 17.88 seconds
Frame 27: 18.56 seconds
Frame 28: 19.25 seconds
Frame 29: 19.94 seconds
Frame 30: 20.62 seconds
Frame 31: 21.31 seconds
Frame 32: 22.00 seconds
Frame 33: 22.69 seconds


KeyboardInterrupt: 

In [47]:
import pandas as pd
# Create an empty dataframe
data = frame_dict

# Create lists for columns
frame_column = []
values_column = []

# Loop through the dictionary and populate the columns
for frame, values in data.items():
    frame_column.append(frame)
    values_column.append(', '.join(values))

# Create the dataframe
df = pd.DataFrame({'Frame': frame_column, 'Values': values_column})

df.to_csv('all_frames_similarity.csv')

In [None]:
prompts = ["action video game","not action video game"]


In [34]:
prompts = ["action video game","not action video game"]
active_prompt_list = ["shooting"]
#CLASS_MAP_gun = ["gun","no_gun"]

In [None]:
for in range

In [40]:
frame_category

[10, 20, 30, 40, 50, 60, 70, 80, 90]

In [None]:
_list = ['action video game',"not action video game"]

In [52]:
frame_intervals = list(range(20, 280, 20))
frame_category = list(range(10,110,10))
results = []
stride_length = 30
frame_index = 0
#print(os.path.exists(VIDEO))
num_frames = 0
for frame in enumerate(sv.get_video_frames_generator(source_path=VIDEO,stride=stride_length,start =0)):
    #label = classify_image(frame)
    num_frames +=1
    if num_frames % 20 == 0:
        #print(num_frames)
        #print(frame_category[frame_index])
        #print(frame_intervals[frame_index])
        print('\n')
        #frame_index+=1
      
    
print(num_frames)

28it [00:00, 40.61it/s]





45it [00:01, 44.40it/s]





64it [00:01, 35.92it/s]





88it [00:02, 42.43it/s]





104it [00:02, 46.95it/s]





128it [00:03, 39.80it/s]





150it [00:03, 48.17it/s]





165it [00:04, 54.17it/s]





188it [00:04, 47.75it/s]





204it [00:04, 44.80it/s]





221it [00:06, 36.28it/s]



221





In [None]:
#prompt list of 10. if it gets it, add it to list, and also which frame

In [60]:
prompts = ["Drake","random person"]
#active_prompt_list = ["shooting"]

In [69]:

results = []
stride_length = 30
#print(os.path.exists(VIDEO))

num_frames = 0
for frame in tqdm(sv.get_video_frames_generator(source_path=VIDEO,stride=stride_length)):
    
    num_frames+=1
    #print(num_frames)

    if num_frames >20 and num_frames < 45:
        print(num_frames)
        label = classify_image(frame)
        print(label)
      
        if label == prompts[0]:
            print(label)
            print('LABEL FOUND')
    elif num_frames > 45:
        break
            
    

    #results.append(label)
    #if num_frames < 100:
    #    break
    
video_length = stride_length * num_frames
video_length = video_length / 60
print(num_frames)
print(video_length)

18it [00:00, 32.04it/s]

21
[0.21278525563116968, 0.2276793166656769]
random person
22


22it [00:01,  7.86it/s]

[0.28315378839524374, 0.21910644315955138]
Drake
Drake
LABEL FOUND
23
[0.29448649617632927, 0.20471081431448296]
Drake
Drake
LABEL FOUND
24
[0.2992157873011149, 0.2094470267628897]
Drake
Drake
LABEL FOUND
25


25it [00:03,  4.12it/s]

[0.2833458253830297, 0.20780658053403403]
Drake
Drake
LABEL FOUND
26
[0.2913344143270633, 0.2080881761309545]
Drake
Drake
LABEL FOUND
27


27it [00:04,  3.42it/s]

[0.2919890919500771, 0.19950343714508936]
Drake
Drake
LABEL FOUND
28
[0.23505412146691754, 0.1927602175575251]
Drake
Drake
LABEL FOUND
29


29it [00:05,  2.80it/s]

[0.23679538994858787, 0.19367486559685332]
Drake
Drake
LABEL FOUND
30


30it [00:06,  2.63it/s]

[0.27504372055707405, 0.23258823833517972]
Drake
Drake
LABEL FOUND
31


31it [00:06,  2.35it/s]

[0.2990253635416379, 0.20871301220336244]
Drake
Drake
LABEL FOUND
32


32it [00:07,  2.28it/s]

[0.29776158542736825, 0.21735953040512462]
Drake
Drake
LABEL FOUND
33


33it [00:07,  2.21it/s]

[0.2855282864024514, 0.21475692070945027]
Drake
Drake
LABEL FOUND
34


34it [00:08,  2.04it/s]

[0.2942120824132894, 0.21834203578264808]
Drake
Drake
LABEL FOUND
35


35it [00:08,  2.00it/s]

[0.24045351577678786, 0.19133978491906684]
Drake
Drake
LABEL FOUND
36


36it [00:09,  1.85it/s]

[0.2414150095444465, 0.19092482868280977]
Drake
Drake
LABEL FOUND
37


37it [00:10,  1.91it/s]

[0.2990996180597319, 0.20405968198083194]
Drake
Drake
LABEL FOUND
38


38it [00:10,  1.79it/s]

[0.2824112305631857, 0.20555268214420758]
Drake
Drake
LABEL FOUND
39


39it [00:11,  1.87it/s]

[0.2819788148129171, 0.20643229623932657]
Drake
Drake
LABEL FOUND
40


40it [00:11,  1.77it/s]

[0.27954749907006987, 0.20727710241091546]
Drake
Drake
LABEL FOUND
41


41it [00:12,  1.84it/s]

[0.29461300162382986, 0.20825290506918495]
Drake
Drake
LABEL FOUND
42


42it [00:12,  1.82it/s]

[0.18508140260285572, 0.1975257288368981]
random person
43


43it [00:13,  1.73it/s]

[0.17575734612695573, 0.19933951008122627]
random person
44


45it [00:14,  3.19it/s]

[0.1895222855901908, 0.20003631163280605]
random person
46
23.0





In [None]:
#stride of 30 = ~2 frames every second, so 20 frames every 10 seconds
#60 frames a second if 1:50 video, stride of 30, 221 frames,

In [17]:
prompts = ["picture of a gun","picture of no gun"]
active_prompt_list = ["picture of a gun"]
#CLASS_MAP_gun = ["gun","no_gun"]

In [17]:
from tqdm import tqdm

results = []

print(os.path.exists(VIDEO))
num_frames = 0
for frame in tqdm(sv.get_video_frames_generator(source_path=VIDEO, stride=10, start=0)):
    label = classify_image(frame)
    num_frames += 1
    results.append(label)

video_length = 10 * len(results)

video_length = video_length / 60

print(f"Does this video contain a {ACTIVE_PROMPT}?", "yes" if ACTIVE_PROMPT.lower() in results else "no")

print(num_frames)

if ACTIVE_PROMPT in results:
    print(f"When does the {ACTIVE_PROMPT} first appear?", round(results.index(ACTIVE_PROMPT) * 10 / 60, 0), "seconds")

print(f"For how long is the {ACTIVE_PROMPT} visible?", round(results.count(ACTIVE_PROMPT) * 10 / 60, 0), "seconds")

True


AttributeError: module 'supervision' has no attribute 'get_video_frames_generator'

In [None]:
import cv2
print(cv2.__version__)


4.8.0
