# Caricamento dei modelli:

### 1) SAM2

In [1]:
import shutil
import matplotlib
from groundingdino.util.inference import load_model, load_image, predict, annotate

import numpy as np
import torchvision.transforms as transforms
#from sklearn.ensemble import IsolationForest
from torchvision.ops import box_convert
import os
from scipy.ndimage import generic_filter
import utility

# if using Apple MPS, fall back to CPU for unsupported ops
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
import torch
import torchvision
import cv2
from matplotlib import pyplot as plt
from PIL import Image
from sam2.build_sam import build_sam2, build_sam2_video_predictor
from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator, SAM2ImagePredictor



In [2]:
# select the device for computation
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
print(f"using device: {device}")

def check_bfloat16_cublas_support():
    """Test if the GPU supports bfloat16 in cublasGemmStridedBatchedEx operations specifically"""
    if not torch.cuda.is_available():
        print("CUDA not available")
        return False

    try:
        # Create tensors that will specifically trigger cublasGemmStridedBatchedEx
        # Use larger dimensions and batch size to ensure the specific CUBLAS path is taken
        batch_size = 8
        seq_len = 64
        hidden_size = 128

        # Create example inputs similar to transformer attention operations
        query = torch.randn(batch_size, seq_len, hidden_size, device="cuda").to(torch.bfloat16)
        key = torch.randn(batch_size, seq_len, hidden_size, device="cuda").to(torch.bfloat16)
        value = torch.randn(batch_size, seq_len, hidden_size, device="cuda").to(torch.bfloat16)

        # This will trigger cublasGemmStridedBatchedEx internally
        # First compute query @ key.transpose(-2, -1) which uses batched matmul
        attn_weights = torch.bmm(query, key.transpose(1, 2))
        torch.cuda.synchronize()

        # Then compute attn_weights @ value which uses another batched matmul
        attn_output = torch.bmm(attn_weights.softmax(dim=-1), value)
        torch.cuda.synchronize()

        print("GPU fully supports bfloat16 in cublasGemmStridedBatchedEx")
        return True

    except RuntimeError as e:
        if "CUBLAS_STATUS_NOT_SUPPORTED" in str(e):
            print(f"bfloat16 not fully supported: {str(e)}")
            return False
        else:
            print(f"Other error occurred: {str(e)}")
            raise

if device.type == "cuda":
    if check_bfloat16_cublas_support():
        print("Using bfloat16 precision")  # use bfloat16 for the entire notebook
        torch.autocast("cuda", dtype=torch.bfloat16).__enter__()
    else:
        print("bfloat16 not supported, using float32 instead")  # older GPUs like Titan X do not support CUDNN ops with bfloat16...
        torch.autocast("cuda", dtype=torch.float32).__enter__()
    #torch.autocast("cuda", dtype=torch.float32).__enter__()
    # turn on tfloat32 for Ampere GPUs
    # (https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices)
    if torch.cuda.get_device_properties(0).major >= 8:
        torch.backends.cuda.matmul.allow_tf32 = True
        torch.backends.cudnn.allow_tf32 = True
elif device.type == "mps":
    print(
        "\nSupport for MPS devices is preliminary. SAM 2 is trained with CUDA and might "
        "give numerically different outputs and sometimes degraded performance on MPS. "
        "See e.g. https://github.com/pytorch/pytorch/issues/84936 for a discussion."
    )

# loading SAM parameters
sam2_checkpoint = "./models/sam2.1/sam2.1_hiera_tiny.pt"
model_cfg_path = "./configs/sam2.1/sam2.1_hiera_t.yaml"


using device: cuda
bfloat16 not fully supported: CUDA error: CUBLAS_STATUS_NOT_SUPPORTED when calling `cublasGemmStridedBatchedEx(handle, opa, opb, (int)m, (int)n, (int)k, (void*)&falpha, a, CUDA_R_16BF, (int)lda, stridea, b, CUDA_R_16BF, (int)ldb, strideb, (void*)&fbeta, c, CUDA_R_16BF, (int)ldc, stridec, (int)num_batches, compute_type, CUBLAS_GEMM_DEFAULT_TENSOR_OP)`
bfloat16 not supported, using float32 instead


In [3]:
device = device
sam2 = build_sam2(model_cfg_path, sam2_checkpoint, device=device)
# mask_generator = SAM2AutomaticMaskGenerator(
#     sam2,
#     points_per_side=32,  # Increase for more detailed segmentation
#     pred_iou_thresh=0.7,  # Adjust threshold based on your needs 0.86 def
#     stability_score_thresh=0.92,
#     stability_score_offset=0.7,
#     crop_n_layers=1,
#     crop_n_points_downscale_factor=2
# )
image_predictor = SAM2ImagePredictor(sam2)
video_predictor = build_sam2_video_predictor(model_cfg_path, sam2_checkpoint, device=device)

### 2) Grounding DINO

In [4]:
model = load_model("./configs/groundingdino/GroundingDINO_SwinT_OGC.py", "./models/groundingdino/groundingdino_swint_ogc.pth",device)
#IMAGE_PATH = ""
TEXT_PROMPT = "object ."
BOX_TRESHOLD = 0.35
TEXT_TRESHOLD = 0.25



final text_encoder_type: bert-base-uncased


## Utilizzo su Dataset 'FOSH'


In [None]:
from utility import *
print(os.getcwd())  # This prints the current directory

# Where to retrive images
folder_path = "inserimento_oggetti"

# Extract frames
frame_names = [
    p for p in os.listdir(folder_path)
    if os.path.splitext(p)[-1] in [".jpg", ".jpeg", ".JPG", ".JPEG"]
]
# Sorting frames
frame_names.sort(key=lambda p: int(os.path.splitext(p)[0].split('_')[-1]))

#setting the railway box
railway_box = np.array([0, 256, 640, 512])

# Setting statistics
gd_iou, h_iou, otsu_iou = 0, 0, 0
gd_dice , h_dice, otsu_dice = 0, 0, 0
gd_precision , h_precision, otsu_precision = 0, 0, 0
gd_recall, h_recall, otsu_recall = 0, 0, 0

n_frames = 0
skipped = 0

try:
    for p in frame_names:
        print('----------------------')
        image =  cv2.imread(os.path.join(folder_path, p))
        ground_truth = cv2.imread(utility.find_corresponding_segmentation(p, 'segmentazione_oggetti'), cv2.IMREAD_GRAYSCALE)

        if cv2.countNonZero(ground_truth) < 170:
            print("oggetto troppo piccolo")
            skipped += 1
            continue

        if n_frames % 20 == 0:
            print("frame saltati: ", skipped)

        name = str(p)
        print("Analyzing image:", name)
        print()
        image_predictor.set_image(image)

        object_mask_gd, m_h, m_l = None, None, None
        n_frames += 1

        ### GROUNDING DINO + SAM 2 METHOD


        # Finding objects in the bounding box with Grounding DINO
        image_source, gd_image = load_image(folder_path+'/'+name)

        gd_boxes, logits, phrases = predict(
            model=model,
            image=gd_image,
            caption=TEXT_PROMPT,
            box_threshold=BOX_TRESHOLD,
            text_threshold=TEXT_TRESHOLD,
            device=device,
        )

        h, w, _ = image_source.shape
        gd_boxes = gd_boxes * torch.Tensor([w, h, w, h])
        gd_boxes = box_convert(boxes=gd_boxes, in_fmt="cxcywh", out_fmt="xyxy").numpy()

        for box in gd_boxes:
            if utility.is_contained(box, railway_box,1):
                object_mask_gd, score_gd, _ = image_predictor.predict(
                    box=box,
                    multimask_output=False,
                )
                # salvataggio dell'immagine
                utility.show_masks(image, object_mask_gd, score_gd,borders=False,show=False,savefig=True,save_path="debug/gd2/",
                                   save_name=name,box_coords=box)


                statistics_map_gd = utility.segmentation_metrics(object_mask_gd, ground_truth)
                for metric in statistics_map_gd:
                    if metric == 'IoU':
                        gd_iou += statistics_map_gd[metric]
                    elif metric == 'Dice':
                        gd_dice += statistics_map_gd[metric]
                    elif metric == 'Precision':
                        gd_precision += statistics_map_gd[metric]
                    elif metric == 'Recall':
                        gd_recall += statistics_map_gd[metric]

        print("Grounding Dino - frame: ",n_frames, ", IoU: ", gd_iou/n_frames, ", Dice: ", gd_dice/n_frames,
              ", Precision: ", gd_precision/n_frames, ", Recall: ", gd_recall/n_frames)




        ### HOLES METHOD



        # Creating a grid of points for the railway's mask
        points ,labels = utility.create_grid(railway_box, points_per_row=[3, 4, 5])

        masks, scores, logits = image_predictor.predict(
            point_coords=points,
            point_labels=labels,
            #box=railway_box,
            multimask_output=False,
            return_logits=True,
        )
        sorted_ind = np.argsort(scores)[::-1]
        masks = masks[sorted_ind]
        scores = scores[sorted_ind]
        logits = logits[sorted_ind]

        # Displaying the image with the grid approach, use the parameters to show or save
        utility.show_masks(image,masks>0,scores,input_labels=labels,point_coords=points,show=False,savefig=True,
                           save_path='debug/grid/',save_name=p)

        l = np.squeeze(masks)
        l[l < 0] = 0

        sigmoid_mask = utility.sigmoid(l)  # Values are now in range [0, 1]

        #Convert to uint8 (0-255 range for OpenCV)
        sigmoid_mask_2d = (sigmoid_mask * 255).astype(np.uint8)  # Shape: (1, H, W)

        sig = (sigmoid_mask_2d > 127).astype(np.uint8) # binarify the mask to search for holes

        coord_holes, holes_labels = utility.find_holes(sig, 100)

        if len(coord_holes)>0:

            m_h,s_h,_ = image_predictor.predict(
                point_coords=coord_holes,
                point_labels=holes_labels,
                multimask_output=False,
            )
            utility.show_masks(image, m_h, s_h, borders=False, show=False, savefig=True, point_coords=coord_holes,
                               input_labels=holes_labels, save_path='debug/holes/', save_name=name)

            d_h = utility.segmentation_metrics(m_h, ground_truth)
            for metric in d_h:
                if metric == 'IoU':
                    h_iou += d_h[metric]
                elif metric == 'Dice':
                    h_dice += d_h[metric]
                elif metric == 'Precision':
                    h_precision += d_h[metric]
                elif metric == 'Recall':
                    h_recall += d_h[metric]

        print("Holes - frame: ", n_frames, ", IoU: ", h_iou / n_frames, ", Dice: ", h_dice / n_frames, ", Precision: ", h_precision / n_frames, ", Recall: ", h_recall / n_frames)



        ###  OTSU METHOD

        #Creating an high-pass filter upon railway mask logits using OTSU for the calculation of the threshold

        thresh_val, binary_mask = cv2.threshold(sigmoid_mask_2d, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

        binary_mask[binary_mask > 127] = 1


        if binary_mask.dtype != np.uint8:
            binary_mask = binary_mask.astype(np.uint8)


        coord_holes, o_labels = utility.find_holes(binary_mask, 100)
        if len(coord_holes) > 0:
            m_o, s_o, _ = image_predictor.predict(
                point_coords=coord_holes,
                point_labels=o_labels,
                multimask_output=False,
            )
            utility.show_masks(image, m_o, np.array([0]), borders=False, point_coords=coord_holes, input_labels=o_labels,
                               show=False, savefig=True, save_path='debug/otzu/', save_name=name)

            d_o = utility.segmentation_metrics(m_o, ground_truth)
            for metric in d_o:
                if metric == 'IoU':
                    otsu_iou += d_o[metric]
                elif metric == 'Dice':
                    otsu_dice += d_o[metric]
                elif metric == 'Precision':
                    otsu_precision += d_o[metric]
                elif metric == 'Recall':
                    otsu_recall += d_o[metric]


        print("Otsu - frame: ", n_frames, ", IoU: ", otsu_iou / n_frames, ", Dice: ", otsu_dice / n_frames,
              ", Precision: ", otsu_precision / n_frames, ", Recall: ", otsu_recall / n_frames)
except(KeyboardInterrupt, SystemExit):
    print("Exiting...")
    print('iou', gd_iou, h_iou, otsu_iou)
    print('dice', gd_dice, h_dice, otsu_dice)
    print('precision', gd_precision, h_precision, otsu_precision)
    print('recall', gd_recall, h_recall, otsu_recall)
    print('n_frames ', n_frames)
    print('skipped ',skipped)

## Anomaly detection (Grounding DINO + SAM2)


In [5]:
#rd.video_analysis('s_c_f','seg_new_video', model)

video_dir = 'test_video/test_video_san_donato/'
segmented_video_dir = 'test_video/output'


# frame-based processing

In [None]:
try:
    #Extract frames
    frame_names = [
        p for p in os.listdir(video_dir)
        if os.path.splitext(p)[-1] in [".jpg", ".jpeg", ".JPG", ".JPEG"]
    ]
    #Sorting frames
    frame_names.sort(key=lambda p: int(os.path.splitext(p)[0].split('_')[-1]))
    #Setting the inference_state
    inference_state = video_predictor.init_state(video_path=video_dir)

    ann_frame_idx = 0  # the frame index we interact with
    ann_obj_id = 1  # give a unique id to each object we interact with (it can be any integers)

    # Finding the railway and objects bounding box with G DINO
    dino_boxes, phrases, dino_score = utility.grounding_Dino_analyzer(video_dir+'/'+frame_names[0],model,
                            'railway . object .', device)
    max_score_railway = 0
    railway_box = None
    object_points = []

    # FIXME we should check if those bounding box objects are inside the railway box, consider using utility.is_contained()
    for i, phrase in enumerate(phrases):
        if phrase == 'railway' and dino_score[i] > max_score_railway:
            railway_box = dino_boxes[i]
            max_score_railway = dino_score[i]
        if phrase == 'object':
            x_min, y_min, x_max, y_max = dino_boxes[i]
            x_center = (x_min + x_max) // 2
            y_center = (y_min + y_max) // 2
            object_points.append([x_center, y_center])

    # Setting the railway mask
    _, out_obj_ids, out_mask_logits = video_predictor.add_new_points_or_box(
        inference_state=inference_state,
        frame_idx=ann_frame_idx,
        obj_id=ann_obj_id,
        box=railway_box,
    )
    id_objects = []

    # Setting the objects found in the first frame if any
    for obj_point in object_points:
        print('obj_point ', obj_point)
        ann_obj_id += 1
        id_objects.append(ann_obj_id)
        _, out_obj_ids, out_mask_logits = video_predictor.add_new_points_or_box(
            inference_state=inference_state,
            frame_idx=ann_frame_idx,
            obj_id=ann_obj_id,
            points=[obj_point],
            labels= np.array([1], np.int32),
        )

    idx_frame = 0
    video_segments = {}  # video_segments contains the per-frame segmentation results

    while idx_frame < len(frame_names):
        last_masks = {}

        # Starting the inference propagation in future frames
        for out_frame_idx, out_obj_ids, out_mask_logits in video_predictor.propagate_in_video(inference_state, start_frame_idx=idx_frame):

            video_segments[out_frame_idx] = {
                out_obj_id: (out_mask_logits[i] > 0).cpu().numpy()
                for i, out_obj_id in enumerate(out_obj_ids)
            }


            # plt.close('all')
            # plt.imshow(Image.open(os.path.join(video_dir, frame_names[out_frame_idx])))
            #
            # for out_obj_id, out_mask in video_segments[out_frame_idx].items():
            #     utility.show_mask_v(out_mask, plt.gca(), obj_id=out_obj_id)
            #
            # plt.show()

            idx_frame += 1

            # If we're in the 15th frame we stop the propagation to check for newer objects
            if idx_frame % 15 == 0:
                for out_obj_id, out_mask in video_segments[out_frame_idx].items():
                    last_masks[out_obj_id] = out_mask
                break

        # Using Grounding DINO on the last frame
        obj_boxes, _, obj_scores= utility.grounding_Dino_analyzer(video_dir+'/'+frame_names[idx_frame-1], model,'object .',device)
        new_boxes = []

        #check if the objects found have already been seen and followed
        for obj_box in obj_boxes:
            contained = False
            for idx,mask in last_masks.items():
                #print('lll', mask.shape)
                if utility.is_mask_in_box(mask, obj_box):
                    #print("C'era di già")
                    contained = True
                    break
            if contained:
                continue
            else:
                new_boxes.append(obj_box)


        # saving those 15 frames in the segmented_video_dir
        for idx_frame_proc in range(idx_frame - 15, idx_frame):
            plt.close('all')
            plt.imshow(Image.open(os.path.join(video_dir, frame_names[idx_frame_proc])))

            for out_obj_id, out_mask in video_segments[idx_frame_proc].items():
                utility.show_mask_v(out_mask, plt.gca(), obj_id=out_obj_id)
            plt.savefig(os.path.join(segmented_video_dir, frame_names[idx_frame_proc]),
                        bbox_inches='tight', pad_inches=0)
            #plt.show()

        # If we find new objects we add them to the last frame using the same inference_state, then we can restart the  propagation process
        for obj_box in new_boxes:
            print("new box ", obj_box)
            ann_obj_id += 1

            x_min, y_min, x_max, y_max = obj_box
            x_center = (x_min + x_max) // 2
            y_center = (y_min + y_max) // 2

            _, out_obj_ids, out_mask_logits = video_predictor.add_new_points_or_box(
                inference_state=inference_state,
                frame_idx= idx_frame,
                obj_id=ann_obj_id,
                points=[[x_center, y_center]],
                labels=np.array([1], np.int32),
            )
        # segmented_img,_ = draw_mask_2(np.array(Image.open(os.path.join(video_dir, frame_names[out_frame_idx]))), out_mask_logits[0].cpu().numpy())
        # im = Image.fromarray(segmented_img)
        # save_path = os.path.join(segmented_video_dir, f"frame_{out_frame_idx:03d}.png")
        # im.save(save_path)# max_logit_value = out_mask_logits[0].max().item()


        # plt.imshow(out_mask_logits.squeeze(), cmap='viridis')
        # plt.colorbar(label='Valore dei Logits')
        # plt.title("Logits della Maschera")
        # plt.axis('off')
        # plt.show()
        # if out_frame_idx == 2:
        #     print("Annotiamo l'anomalia")
        #     # Let's add a negative click on this frame at (x, y) = (82, 415) to refine the segment
        #     pp = np.array([[550, 370]], dtype=np.float32)
        #     # for labels, `1` means positive click and `0` means negative click
        #     ll = np.array([0], np.int32)
        #     _, out_obj_ids, out_mask_logits = self.video_predictor.add_new_points_or_box(
        #         inference_state=inference_state,
        #         frame_idx=out_frame_idx,
        #         obj_id=ann_obj_id,
        #         points=pp,
        #         labels=ll,
        #     )
        # plt.figure(figsize=(9, 6))
        # plt.title(f"frame {out_frame_idx}")
        # plt.imshow(Image.open(os.path.join(video_dir, frame_names[out_frame_idx])))
        # utility.show_mask_v((out_mask_logits[0] > threshold).cpu().numpy(), plt.gca(), obj_id=out_obj_ids[0])
        # if out_frame_idx == 100:
        #     plt.close('all')



    # vis_frame_stride = 1
    # plt.close("all")
    # for out_frame_idx in range(0, len(frame_names), vis_frame_stride):
    #     # image = np.asarray(Image.open(os.path.join(video_dir, frame_names[out_frame_idx])))
    #     for out_obj_id, out_mask in video_segments[out_frame_idx].items():
    #         utility.show_mask_v(out_mask, plt.gca(), obj_id=out_obj_id)
    #         # segmented_img = draw_mask(image, out_mask)
    #         # im = Image.fromarray(segmented_img)
    #         # save_path = os.path.join(segmented_video_dir, f"frame_{out_frame_idx:03d}.png")
    #         # im.save(save_path)
    #     plt.savefig(os.path.join(segmented_video_dir, frame_names[out_frame_idx]))
except(KeyboardInterrupt, SystemExit):
    print("Exiting...")

# Video-based processing

Refactored version to reduce CUDA memory usage and improve performance by processing frames in batches.
Avoids reprocessing frames from the beginning by using a stateful approach with SAM2.

In [7]:
import os
import torch
import numpy as np
import cv2
import matplotlib.pyplot as plt
import gc
import time

# Path configurations
video_path = "test_video/ir_2025-01-31_05-09-12.mp4"  # Can be camera index (0, 1) for webcam
segmented_video_dir = "test_video/output"

BOX_TRESHOLD = 0.22  # Threshold for Grounding DINO box detection
TEXT_TRESHOLD = 0.18  # Threshold for Grounding DINO text detection

# Create output directory if it doesn't exist
os.makedirs(segmented_video_dir, exist_ok=True)

# Create a temporary directory for frame storage
temp_dir = os.path.join("temp_frames")
os.makedirs(temp_dir, exist_ok=True)

# Open the video stream
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise Exception(f"Could not open video stream: {video_path}")

# Get video properties
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
print(f"Video stream at {fps} FPS, resolution: {width}x{height}")

# Object tracking variables
ann_obj_id = 1  # Object ID counter
last_masks = {}  # Store the last known mask for each object
frame_idx = 0
railway_box = None  # Store railway box for future reference

try:
    while True:
        print(f"\n--- Processing frame {frame_idx} ---")
        start_time = time.time()

        # Read the next frame
        success, frame = cap.read()
        if not success:
            print("End of stream or error reading frame")
            break

        # Clear temp directory
        for file in os.listdir(temp_dir):
            os.remove(os.path.join(temp_dir, file))

        # Save current frame to temp directory
        frame_path = os.path.join(temp_dir, "000000.jpg")
        cv2.imwrite(frame_path, frame)

        # Convert to RGB for visualization
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Initialize new state for this frame
        torch.cuda.empty_cache()
        gc.collect()
        inference_state = video_predictor.init_state(video_path=temp_dir)

        # Process based on frame index
        if frame_idx == 0:
            # First frame: detect railway and objects with Grounding DINO
            dino_boxes, phrases, dino_scores = utility.grounding_Dino_analyzer(
                frame_path, model, 'railway . object .', device, BOX_TRESHOLD=BOX_TRESHOLD, TEXT_TRESHOLD=TEXT_TRESHOLD
            )

            # Find railway box and object points
            max_score_railway = 0
            object_points = []

            for i, phrase in enumerate(phrases):
                if phrase == 'railway' and dino_scores[i] > max_score_railway:
                    railway_box = dino_boxes[i]
                    max_score_railway = dino_scores[i]
                elif phrase == 'object':
                    x_min, y_min, x_max, y_max = dino_boxes[i]
                    object_points.append([(x_min + x_max) // 2, (y_min + y_max) // 2])

            print(f"Found railway: {railway_box is not None}, objects: {len(object_points)}")

            # Add railway to tracking
            if railway_box is not None:
                _, out_obj_ids, out_mask_logits = video_predictor.add_new_points_or_box(
                    inference_state=inference_state,
                    frame_idx=0,
                    obj_id=ann_obj_id,
                    box=railway_box,
                )

                # Store railway mask
                last_masks[ann_obj_id] = (out_mask_logits[0] > 0).cpu().numpy()

            # Add detected objects to tracking
            for obj_point in object_points:
                ann_obj_id += 1
                _, out_obj_ids, out_mask_logits = video_predictor.add_new_points_or_box(
                    inference_state=inference_state,
                    frame_idx=0,
                    obj_id=ann_obj_id,
                    points=[obj_point],
                    labels=np.array([1], np.int32),
                )

                # Store object mask
                idx = list(out_obj_ids).index(ann_obj_id) if ann_obj_id in out_obj_ids else 0
                last_masks[ann_obj_id] = (out_mask_logits[idx] > 0).cpu().numpy()
        else:
            # For non-first frames, transfer objects from previous frame
            for obj_id, mask in last_masks.items():
                # Convert mask to proper format and find center point
                mask_array = np.asarray(mask)
                if mask_array.ndim > 2:
                    mask_array = mask_array.squeeze()
                    if mask_array.ndim > 2:
                        mask_array = mask_array[0]

                # Find non-zero coordinates (points inside the mask)
                y_indices, x_indices = np.where(mask_array > 0)

                if len(y_indices) > 0:
                    # Use center of mass as representative point
                    center_y = int(np.mean(y_indices))
                    center_x = int(np.mean(x_indices))

                    # Special handling for railway (can use box instead of point)
                    if obj_id == 1 and railway_box is not None:
                        _, _, _ = video_predictor.add_new_points_or_box(
                            inference_state=inference_state,
                            frame_idx=0,
                            obj_id=obj_id,
                            box=railway_box,
                        )
                    else:
                        # Add object using its center point
                        _, _, _ = video_predictor.add_new_points_or_box(
                            inference_state=inference_state,
                            frame_idx=0,
                            obj_id=obj_id,
                            points=[[center_x, center_y]],
                            labels=np.array([1], np.int32),
                        )

        # Propagate all objects in current frame
        result = next(video_predictor.propagate_in_video(
            inference_state,
            start_frame_idx=0
        ))
        _, out_obj_ids, out_mask_logits = result

        # Update all masks for next frame
        for i, obj_id in enumerate(out_obj_ids):
            last_masks[obj_id] = (out_mask_logits[i] > 0).cpu().numpy()

        # Check for new objects periodically
        if frame_idx % 15 == 0 and frame_idx > 0:
            dino_boxes, phrases, _ = utility.grounding_Dino_analyzer(
                frame_path, model, 'object .', device, BOX_TRESHOLD=BOX_TRESHOLD, TEXT_TRESHOLD=TEXT_TRESHOLD
            )

            # Check each detected object
            for i, phrase in enumerate(phrases):
                if phrase == 'object':
                    x_min, y_min, x_max, y_max = dino_boxes[i]
                    center_x = (x_min + x_max) // 2
                    center_y = (y_min + y_max) // 2

                    # Check if this object is already tracked
                    already_tracked = False
                    for mask in last_masks.values():
                        mask_array = np.asarray(mask)
                        if mask_array.ndim > 2:
                            mask_array = mask_array.squeeze()
                            if mask_array.ndim > 2:
                                mask_array = mask_array[0]

                        # Check if point is inside any existing mask
                        if (0 <= int(center_y) < mask_array.shape[0] and
                          0 <= int(center_x) < mask_array.shape[1] and
                            mask_array[int(center_y), int(center_x)]):
                            already_tracked = True
                            break

                    # Add new object if not already tracked
                    if not already_tracked:
                        ann_obj_id += 1
                        print(f"New object {ann_obj_id} detected at frame {frame_idx}")

                        _, out_obj_ids, out_mask_logits = video_predictor.add_new_points_or_box(
                            inference_state=inference_state,
                            frame_idx=0,
                            obj_id=ann_obj_id,
                            points=[[center_x, center_y]],
                            labels=np.array([1], np.int32),
                        )

                        # Re-propagate with the new object
                        result = next(video_predictor.propagate_in_video(
                            inference_state,
                            start_frame_idx=0
                        ))
                        _, out_obj_ids, out_mask_logits = result

                        # Update masks dictionary
                        for i, obj_id in enumerate(out_obj_ids):
                            last_masks[obj_id] = (out_mask_logits[i] > 0).cpu().numpy()

        # Create visualization
        plt.figure(figsize=(8, 6))
        plt.imshow(frame_rgb)
        for obj_id, mask in last_masks.items():
            utility.show_mask_v(mask, plt.gca(), obj_id=obj_id)
        plt.savefig(os.path.join(segmented_video_dir, f"frame_{frame_idx:06d}.jpg"))
        plt.close()

        # Calculate processing time
        processing_time = time.time() - start_time
        print(f"Frame processed in {processing_time:.2f}s")

        # Increment frame counter
        frame_idx += 1

        # Clear memory for next iteration
        del inference_state
        gc.collect()
        torch.cuda.empty_cache()

except (KeyboardInterrupt, SystemExit):
    print("Exiting gracefully...")
finally:
    # Release resources
    cap.release()

    # Clean up temp directory
    for file in os.listdir(temp_dir):
        os.remove(os.path.join(temp_dir, file))
    os.rmdir(temp_dir)

    print(f"Processing completed or interrupted after {frame_idx} frames")

Video stream at 25.0 FPS, resolution: 640x512

--- Processing frame 0 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 29.04it/s]


Analysis with Grounding Dino




['railway', 'object', 'object'] tensor([0.7364, 0.5972, 0.3127])
Found railway: True, objects: 2


propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.66s

--- Processing frame 1 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 30.51it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]

propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.92s

--- Processing frame 143 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.54it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.87s

--- Processing frame 144 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.96it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.85s

--- Processing frame 145 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 29.09it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.89s

--- Processing frame 146 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.95it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.85s

--- Processing frame 147 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.82it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.87s

--- Processing frame 148 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.14it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.86s

--- Processing frame 149 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 28.82it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.83s

--- Processing frame 150 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 29.20it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.6623, 0.3529])
Frame processed in 1.35s

--- Processing frame 151 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.05it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.90s

--- Processing frame 152 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.09it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.87s

--- Processing frame 153 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.69it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.84s

--- Processing frame 154 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.96it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.84s

--- Processing frame 155 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 26.96it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.88s

--- Processing frame 156 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 29.13it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.92s

--- Processing frame 157 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.68it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.86s

--- Processing frame 158 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 26.92it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.84s

--- Processing frame 159 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.10it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.85s

--- Processing frame 160 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.70it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.90s

--- Processing frame 161 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.19it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.86s

--- Processing frame 162 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 28.36it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.85s

--- Processing frame 163 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 26.64it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.83s

--- Processing frame 164 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.49it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.96s

--- Processing frame 165 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 26.47it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.6710, 0.3427])
Frame processed in 1.40s

--- Processing frame 166 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.19it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.88s

--- Processing frame 167 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.94it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.95s

--- Processing frame 168 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.44it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.88s

--- Processing frame 169 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.12it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.90s

--- Processing frame 170 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 28.74it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.87s

--- Processing frame 171 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.17it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.86s

--- Processing frame 172 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.42it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.86s

--- Processing frame 173 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.89it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.85s

--- Processing frame 174 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.29it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.87s

--- Processing frame 175 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.78it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.93s

--- Processing frame 176 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.26it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.86s

--- Processing frame 177 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 26.89it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.85s

--- Processing frame 178 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.46it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.86s

--- Processing frame 179 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.69it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.88s

--- Processing frame 180 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 26.22it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.6634, 0.3602])
Frame processed in 1.53s

--- Processing frame 181 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 13.58it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.15s

--- Processing frame 182 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.16it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.21s

--- Processing frame 183 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.53it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.97s

--- Processing frame 184 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 18.55it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.90s

--- Processing frame 185 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 26.11it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.87s

--- Processing frame 186 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.65it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.94s

--- Processing frame 187 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.01it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.86s

--- Processing frame 188 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.93it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.89s

--- Processing frame 189 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 28.58it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.89s

--- Processing frame 190 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 28.07it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.97s

--- Processing frame 191 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 28.71it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.95s

--- Processing frame 192 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.34it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.88s

--- Processing frame 193 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 28.58it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.90s

--- Processing frame 194 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.25it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.89s

--- Processing frame 195 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.59it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.7231, 0.3352])
Frame processed in 1.42s

--- Processing frame 196 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 28.92it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.88s

--- Processing frame 197 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 15.43it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.99s

--- Processing frame 198 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.30it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.92s

--- Processing frame 199 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.08it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.87s

--- Processing frame 200 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.90it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.89s

--- Processing frame 201 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.00it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.91s

--- Processing frame 202 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.73it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.98s

--- Processing frame 203 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 29.84it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.87s

--- Processing frame 204 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.87it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.87s

--- Processing frame 205 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.27it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.89s

--- Processing frame 206 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 26.84it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.87s

--- Processing frame 207 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.64it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.95s

--- Processing frame 208 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 29.90it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.89s

--- Processing frame 209 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.95it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.90s

--- Processing frame 210 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 29.51it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.7402, 0.3688])
Frame processed in 1.45s

--- Processing frame 211 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.05it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.94s

--- Processing frame 212 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.72it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.01s

--- Processing frame 213 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.70it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.98s

--- Processing frame 214 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.33it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.90s

--- Processing frame 215 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 30.45it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.92s

--- Processing frame 216 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.57it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.93s

--- Processing frame 217 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.23it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.89s

--- Processing frame 218 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.14it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.89s

--- Processing frame 219 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 26.01it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.89s

--- Processing frame 220 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.58it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.94s

--- Processing frame 221 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 28.35it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.89s

--- Processing frame 222 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.51it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.89s

--- Processing frame 223 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.08it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.90s

--- Processing frame 224 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.35it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.93s

--- Processing frame 225 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.02it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.6881, 0.4214])
Frame processed in 1.46s

--- Processing frame 226 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.30it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.96s

--- Processing frame 227 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 28.31it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.89s

--- Processing frame 228 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.98it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.90s

--- Processing frame 229 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.20it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.98s

--- Processing frame 230 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 29.79it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.94s

--- Processing frame 231 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.13it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.96s

--- Processing frame 232 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 29.29it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.90s

--- Processing frame 233 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 28.71it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.91s

--- Processing frame 234 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.99it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.01s

--- Processing frame 235 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 29.71it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.91s

--- Processing frame 236 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.34it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.98s

--- Processing frame 237 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.61it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.94s

--- Processing frame 238 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.67it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.92s

--- Processing frame 239 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.73it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.96s

--- Processing frame 240 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.68it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object', 'object'] tensor([0.6791, 0.4001, 0.2458])
New object 4 detected at frame 240


propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.54s

--- Processing frame 241 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.16it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.09s

--- Processing frame 242 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.06it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.11s

--- Processing frame 243 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.79it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.03s

--- Processing frame 244 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 29.28it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.01s

--- Processing frame 245 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 30.31it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.05s

--- Processing frame 246 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.06it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.06s

--- Processing frame 247 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.10it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.06s

--- Processing frame 248 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.94it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.04s

--- Processing frame 249 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.28it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 0.99s

--- Processing frame 250 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.14it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.05s

--- Processing frame 251 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.63it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.06s

--- Processing frame 252 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.84it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.01s

--- Processing frame 253 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.30it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.04s

--- Processing frame 254 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.30it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.03s

--- Processing frame 255 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.58it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object', 'object', 'object'] tensor([0.6526, 0.4391, 0.2218, 0.2294])
New object 5 detected at frame 255


propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.67s

--- Processing frame 256 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.97it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.19s

--- Processing frame 257 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.79it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.14s

--- Processing frame 258 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.14it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.19s

--- Processing frame 259 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.03it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.11s

--- Processing frame 260 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.63it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.17s

--- Processing frame 261 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.60it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.17s

--- Processing frame 262 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.50it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.18s

--- Processing frame 263 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.16it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.18s

--- Processing frame 264 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 26.64it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.16s

--- Processing frame 265 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.95it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.18s

--- Processing frame 266 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.58it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.24s

--- Processing frame 267 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.63it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.12s

--- Processing frame 268 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.33it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.24s

--- Processing frame 269 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.65it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.13s

--- Processing frame 270 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.45it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object', 'object'] tensor([0.6078, 0.3432, 0.2734])
Frame processed in 1.70s

--- Processing frame 271 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 28.40it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.12s

--- Processing frame 272 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 26.10it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.19s

--- Processing frame 273 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.47it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.18s

--- Processing frame 274 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 29.01it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.15s

--- Processing frame 275 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.54it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.15s

--- Processing frame 276 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.61it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.11s

--- Processing frame 277 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.68it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.21s

--- Processing frame 278 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 26.50it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.18s

--- Processing frame 279 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.91it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.19s

--- Processing frame 280 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 26.87it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.12s

--- Processing frame 281 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.67it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.22s

--- Processing frame 282 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.25it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.13s

--- Processing frame 283 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.22it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.14s

--- Processing frame 284 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.18it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.17s

--- Processing frame 285 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.71it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.6752, 0.4514])
Frame processed in 1.70s

--- Processing frame 286 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.23it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.19s

--- Processing frame 287 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.63it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.24s

--- Processing frame 288 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.24it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.23s

--- Processing frame 289 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.39it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.13s

--- Processing frame 290 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.99it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.21s

--- Processing frame 291 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 15.36it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.24s

--- Processing frame 292 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.19it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.25s

--- Processing frame 293 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.61it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.23s

--- Processing frame 294 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.74it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.15s

--- Processing frame 295 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.33it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.16s

--- Processing frame 296 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.65it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.16s

--- Processing frame 297 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.90it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.23s

--- Processing frame 298 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.62it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.22s

--- Processing frame 299 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.63it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.20s

--- Processing frame 300 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.40it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.6931, 0.4704])
Frame processed in 1.77s

--- Processing frame 301 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 15.26it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.28s

--- Processing frame 302 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.36it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.21s

--- Processing frame 303 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.59it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.26s

--- Processing frame 304 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.24it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.21s

--- Processing frame 305 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.48it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.28s

--- Processing frame 306 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.73it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.17s

--- Processing frame 307 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.20it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.20s

--- Processing frame 308 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.57it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.25s

--- Processing frame 309 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.42it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.16s

--- Processing frame 310 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.66it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.20s

--- Processing frame 311 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.69it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.27s

--- Processing frame 312 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.86it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.13s

--- Processing frame 313 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.23it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.19s

--- Processing frame 314 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 27.05it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.23s

--- Processing frame 315 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.27it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.7001, 0.3267])
Frame processed in 1.76s

--- Processing frame 316 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 26.32it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.17s

--- Processing frame 317 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.27it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.27s

--- Processing frame 318 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.51it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.25s

--- Processing frame 319 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.87it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.25s

--- Processing frame 320 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.53it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.19s

--- Processing frame 321 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.44it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.20s

--- Processing frame 322 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.18it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.18s

--- Processing frame 323 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 17.75it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.28s

--- Processing frame 324 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.09it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.20s

--- Processing frame 325 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.88it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.17s

--- Processing frame 326 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.03it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.23s

--- Processing frame 327 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.35it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.21s

--- Processing frame 328 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.17it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.27s

--- Processing frame 329 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 17.72it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.28s

--- Processing frame 330 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.80it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.6766, 0.3979])
Frame processed in 1.70s

--- Processing frame 331 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.53it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.17s

--- Processing frame 332 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.28it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.19s

--- Processing frame 333 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.64it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.26s

--- Processing frame 334 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.00it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.18s

--- Processing frame 335 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.71it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.18s

--- Processing frame 336 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.02it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.19s

--- Processing frame 337 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.21it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.25s

--- Processing frame 338 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.82it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.20s

--- Processing frame 339 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 14.48it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.20s

--- Processing frame 340 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.20it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.26s

--- Processing frame 341 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.53it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.17s

--- Processing frame 342 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 12.31it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.40s

--- Processing frame 343 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.18it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.29s

--- Processing frame 344 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.84it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.22s

--- Processing frame 345 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.03it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.6410, 0.2946])
Frame processed in 1.81s

--- Processing frame 346 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 18.69it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.24s

--- Processing frame 347 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.70it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.26s

--- Processing frame 348 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.96it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.22s

--- Processing frame 349 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.55it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.21s

--- Processing frame 350 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.79it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.19s

--- Processing frame 351 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.14it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.29s

--- Processing frame 352 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.55it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.18s

--- Processing frame 353 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.78it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.25s

--- Processing frame 354 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.16it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.27s

--- Processing frame 355 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.82it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.27s

--- Processing frame 356 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.33it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.32s

--- Processing frame 357 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.97it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.31s

--- Processing frame 358 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.25it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.17s

--- Processing frame 359 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.48it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.20s

--- Processing frame 360 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.95it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object', 'object'] tensor([0.6145, 0.2296, 0.2991])
Frame processed in 1.75s

--- Processing frame 361 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.24it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.25s

--- Processing frame 362 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.80it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.26s

--- Processing frame 363 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.94it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.27s

--- Processing frame 364 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.97it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.29s

--- Processing frame 365 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.27it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.27s

--- Processing frame 366 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.87it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.30s

--- Processing frame 367 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.69it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.16s

--- Processing frame 368 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.58it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.16s

--- Processing frame 369 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.27it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.17s

--- Processing frame 370 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.59it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.24s

--- Processing frame 371 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.71it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.21s

--- Processing frame 372 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.16it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.29s

--- Processing frame 373 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.14it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.22s

--- Processing frame 374 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.04it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.25s

--- Processing frame 375 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.92it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.6855, 0.3884])
Frame processed in 1.82s

--- Processing frame 376 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.10it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.28s

--- Processing frame 377 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.63it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.26s

--- Processing frame 378 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.94it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.19s

--- Processing frame 379 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.87it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.25s

--- Processing frame 380 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.81it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.33s

--- Processing frame 381 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.65it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.25s

--- Processing frame 382 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.90it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.24s

--- Processing frame 383 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.38it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.28s

--- Processing frame 384 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.50it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.29s

--- Processing frame 385 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 26.14it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.26s

--- Processing frame 386 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.41it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.34s

--- Processing frame 387 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.69it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.19s

--- Processing frame 388 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.23it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.26s

--- Processing frame 389 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.79it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.24s

--- Processing frame 390 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.89it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.6931, 0.3817])
Frame processed in 1.79s

--- Processing frame 391 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.98it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.20s

--- Processing frame 392 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.33it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.30s

--- Processing frame 393 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.63it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.32s

--- Processing frame 394 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.29it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.24s

--- Processing frame 395 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 17.01it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.22s

--- Processing frame 396 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.63it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.27s

--- Processing frame 397 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.89it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.36s

--- Processing frame 398 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.13it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.25s

--- Processing frame 399 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 15.32it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.27s

--- Processing frame 400 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.24it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.21s

--- Processing frame 401 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.52it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.27s

--- Processing frame 402 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.57it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.24s

--- Processing frame 403 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.33it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.23s

--- Processing frame 404 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.50it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.27s

--- Processing frame 405 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.09it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object', 'object'] tensor([0.6282, 0.3817, 0.2524])
Frame processed in 1.82s

--- Processing frame 406 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.24it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.21s

--- Processing frame 407 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.88it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.30s

--- Processing frame 408 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.32it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.27s

--- Processing frame 409 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.54it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.23s

--- Processing frame 410 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.08it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.32s

--- Processing frame 411 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.83it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.23s

--- Processing frame 412 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.33it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.25s

--- Processing frame 413 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.03it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.28s

--- Processing frame 414 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.01it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.25s

--- Processing frame 415 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.06it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.26s

--- Processing frame 416 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.12it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.26s

--- Processing frame 417 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.18it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.27s

--- Processing frame 418 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 15.45it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.32s

--- Processing frame 419 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.86it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.46s

--- Processing frame 420 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 16.83it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.6495, 0.3595])
Frame processed in 1.82s

--- Processing frame 421 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.86it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.36s

--- Processing frame 422 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.48it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.33s

--- Processing frame 423 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.27it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.31s

--- Processing frame 424 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.20it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.29s

--- Processing frame 425 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.92it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.30s

--- Processing frame 426 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.00it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.33s

--- Processing frame 427 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.89it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.27s

--- Processing frame 428 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.66it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.31s

--- Processing frame 429 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.83it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.24s

--- Processing frame 430 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.52it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.29s

--- Processing frame 431 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.61it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.28s

--- Processing frame 432 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 18.66it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.33s

--- Processing frame 433 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 18.43it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.30s

--- Processing frame 434 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.74it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.34s

--- Processing frame 435 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.17it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.6819, 0.3825])
Frame processed in 1.79s

--- Processing frame 436 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.55it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.36s

--- Processing frame 437 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.75it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.38s

--- Processing frame 438 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.75it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.28s

--- Processing frame 439 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.82it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.30s

--- Processing frame 440 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.32it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.28s

--- Processing frame 441 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.44it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.33s

--- Processing frame 442 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.10it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.37s

--- Processing frame 443 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.89it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.33s

--- Processing frame 444 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.34it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.34s

--- Processing frame 445 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.86it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.30s

--- Processing frame 446 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.91it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.97s

--- Processing frame 447 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 26.40it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.29s

--- Processing frame 448 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.28it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.33s

--- Processing frame 449 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.76it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.26s

--- Processing frame 450 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.29it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.6433, 0.4308])
Frame processed in 1.80s

--- Processing frame 451 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.30it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.23s

--- Processing frame 452 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 15.32it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.37s

--- Processing frame 453 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.56it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.27s

--- Processing frame 454 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.56it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.29s

--- Processing frame 455 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.29it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.29s

--- Processing frame 456 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.51it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.31s

--- Processing frame 457 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.24it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.36s

--- Processing frame 458 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.46it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.35s

--- Processing frame 459 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.46it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.33s

--- Processing frame 460 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.68it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.26s

--- Processing frame 461 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.43it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.26s

--- Processing frame 462 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.57it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.34s

--- Processing frame 463 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.31it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.25s

--- Processing frame 464 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.04it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.43s

--- Processing frame 465 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.59it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.6412, 0.3498])
Frame processed in 1.74s

--- Processing frame 466 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.65it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.36s

--- Processing frame 467 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.04it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.30s

--- Processing frame 468 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.74it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.46s

--- Processing frame 469 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 15.79it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.36s

--- Processing frame 470 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.62it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.29s

--- Processing frame 471 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 18.87it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.36s

--- Processing frame 472 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.30it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.27s

--- Processing frame 473 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.76it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.32s

--- Processing frame 474 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.20it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.26s

--- Processing frame 475 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.29it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.36s

--- Processing frame 476 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.27it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.32s

--- Processing frame 477 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 18.95it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.33s

--- Processing frame 478 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 18.76it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.33s

--- Processing frame 479 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.94it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.28s

--- Processing frame 480 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 15.56it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.6502, 0.4319])
Frame processed in 1.94s

--- Processing frame 481 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.37it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.29s

--- Processing frame 482 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.81it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.33s

--- Processing frame 483 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.53it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.37s

--- Processing frame 484 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.87it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.26s

--- Processing frame 485 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.30it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.38s

--- Processing frame 486 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 13.58it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.44s

--- Processing frame 487 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.85it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.44s

--- Processing frame 488 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.78it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.29s

--- Processing frame 489 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.22it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.32s

--- Processing frame 490 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.18it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.39s

--- Processing frame 491 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.92it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.39s

--- Processing frame 492 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 15.02it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.41s

--- Processing frame 493 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 15.01it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.40s

--- Processing frame 494 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.95it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.35s

--- Processing frame 495 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.98it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.6417, 0.4460])
Frame processed in 1.85s

--- Processing frame 496 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.06it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.42s

--- Processing frame 497 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.64it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.32s

--- Processing frame 498 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.67it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.32s

--- Processing frame 499 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.65it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.36s

--- Processing frame 500 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.25it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.35s

--- Processing frame 501 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.03it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.43s

--- Processing frame 502 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.13it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.97s

--- Processing frame 503 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.58it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.30s

--- Processing frame 504 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.35it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.35s

--- Processing frame 505 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.50it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.37s

--- Processing frame 506 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.97it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.31s

--- Processing frame 507 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.21it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.34s

--- Processing frame 508 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.46it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.28s

--- Processing frame 509 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.81it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.28s

--- Processing frame 510 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.63it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.4694, 0.6929])
Frame processed in 1.79s

--- Processing frame 511 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.95it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.36s

--- Processing frame 512 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.24it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.38s

--- Processing frame 513 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.61it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.37s

--- Processing frame 514 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.28it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.35s

--- Processing frame 515 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.49it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.42s

--- Processing frame 516 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.35it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.32s

--- Processing frame 517 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 18.83it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.31s

--- Processing frame 518 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.02it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.37s

--- Processing frame 519 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.82it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.35s

--- Processing frame 520 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 16.43it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.44s

--- Processing frame 521 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.68it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.42s

--- Processing frame 522 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 18.74it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.32s

--- Processing frame 523 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.01it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.34s

--- Processing frame 524 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.68it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.34s

--- Processing frame 525 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.64it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.6878, 0.4025])
Frame processed in 1.82s

--- Processing frame 526 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.60it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.45s

--- Processing frame 527 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.72it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.35s

--- Processing frame 528 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.07it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.39s

--- Processing frame 529 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.69it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.43s

--- Processing frame 530 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.87it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.35s

--- Processing frame 531 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.43it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.34s

--- Processing frame 532 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.29it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.36s

--- Processing frame 533 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.01it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.40s

--- Processing frame 534 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.58it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.30s

--- Processing frame 535 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.39it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.41s

--- Processing frame 536 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.85it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.37s

--- Processing frame 537 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.96it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.33s

--- Processing frame 538 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.44it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.41s

--- Processing frame 539 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.60it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.35s

--- Processing frame 540 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.65it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.6768, 0.3151])
Frame processed in 1.91s

--- Processing frame 541 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.57it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.41s

--- Processing frame 542 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 18.81it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.39s

--- Processing frame 543 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.61it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.44s

--- Processing frame 544 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.70it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.39s

--- Processing frame 545 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.18it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.37s

--- Processing frame 546 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.05it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.40s

--- Processing frame 547 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.81it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.34s

--- Processing frame 548 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.26it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.40s

--- Processing frame 549 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.66it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.40s

--- Processing frame 550 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.92it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.41s

--- Processing frame 551 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.59it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.37s

--- Processing frame 552 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.32it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.41s

--- Processing frame 553 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 18.44it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.40s

--- Processing frame 554 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.65it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.41s

--- Processing frame 555 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.41it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.7171, 0.3275])
Frame processed in 1.86s

--- Processing frame 556 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.29it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.38s

--- Processing frame 557 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.14it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.46s

--- Processing frame 558 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.20it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.42s

--- Processing frame 559 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 14.68it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.43s

--- Processing frame 560 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 16.21it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.45s

--- Processing frame 561 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.35it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.44s

--- Processing frame 562 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.39it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.37s

--- Processing frame 563 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.61it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.36s

--- Processing frame 564 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.36it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.50s

--- Processing frame 565 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.62it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.35s

--- Processing frame 566 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.63it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.46s

--- Processing frame 567 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.31it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.34s

--- Processing frame 568 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.33it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.48s

--- Processing frame 569 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.90it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.46s

--- Processing frame 570 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.63it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.6887, 0.2617])
Frame processed in 1.92s

--- Processing frame 571 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.95it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.53s

--- Processing frame 572 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 18.13it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.46s

--- Processing frame 573 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.51it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.49s

--- Processing frame 574 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.53it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.40s

--- Processing frame 575 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.64it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.41s

--- Processing frame 576 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.34it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.42s

--- Processing frame 577 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 26.09it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.42s

--- Processing frame 578 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.79it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.41s

--- Processing frame 579 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.05it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.46s

--- Processing frame 580 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 15.57it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.48s

--- Processing frame 581 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.05it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.35s

--- Processing frame 582 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.69it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.37s

--- Processing frame 583 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.33it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.39s

--- Processing frame 584 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.55it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.41s

--- Processing frame 585 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.17it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object', 'object', 'object'] tensor([0.5028, 0.3842, 0.3643, 0.3531])
New object 6 detected at frame 585


propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 2.01s

--- Processing frame 586 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.04it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.49s

--- Processing frame 587 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.59it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.43s

--- Processing frame 588 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.89it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.43s

--- Processing frame 589 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.22it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.44s

--- Processing frame 590 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.07it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.48s

--- Processing frame 591 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.71it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.45s

--- Processing frame 592 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.81it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.52s

--- Processing frame 593 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.64it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.54s

--- Processing frame 594 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.34it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.48s

--- Processing frame 595 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.39it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.48s

--- Processing frame 596 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.10it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.51s

--- Processing frame 597 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.99it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.45s

--- Processing frame 598 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.43it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.50s

--- Processing frame 599 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.57it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.45s

--- Processing frame 600 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.34it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object', 'object'] tensor([0.5951, 0.3881, 0.3119])
Frame processed in 1.92s

--- Processing frame 601 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.70it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.45s

--- Processing frame 602 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.55it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.57s

--- Processing frame 603 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.04it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.56s

--- Processing frame 604 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 17.44it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.55s

--- Processing frame 605 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.35it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.48s

--- Processing frame 606 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.81it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.48s

--- Processing frame 607 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.18it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.45s

--- Processing frame 608 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.51it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.48s

--- Processing frame 609 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.51it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.48s

--- Processing frame 610 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.20it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.47s

--- Processing frame 611 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.35it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.50s

--- Processing frame 612 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.27it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.44s

--- Processing frame 613 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.03it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.44s

--- Processing frame 614 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.10it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.45s

--- Processing frame 615 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.44it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object', 'object', 'object', 'object', 'object'] tensor([0.4379, 0.2238, 0.2423, 0.2383, 0.3003, 0.2968])
Frame processed in 1.93s

--- Processing frame 616 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.87it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.51s

--- Processing frame 617 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.71it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.48s

--- Processing frame 618 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.63it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.49s

--- Processing frame 619 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.60it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.59s

--- Processing frame 620 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.14it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.54s

--- Processing frame 621 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.16it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.50s

--- Processing frame 622 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.53it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.54s

--- Processing frame 623 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.43it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.53s

--- Processing frame 624 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.63it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.50s

--- Processing frame 625 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 18.10it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.59s

--- Processing frame 626 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.51it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.46s

--- Processing frame 627 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.47it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.51s

--- Processing frame 628 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.06it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.47s

--- Processing frame 629 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.87it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.50s

--- Processing frame 630 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.89it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object', 'object', 'object'] tensor([0.4156, 0.3720, 0.2434, 0.2374])
Frame processed in 1.96s

--- Processing frame 631 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.83it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.56s

--- Processing frame 632 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.20it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.58s

--- Processing frame 633 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 14.88it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.57s

--- Processing frame 634 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.00it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.48s

--- Processing frame 635 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.72it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.50s

--- Processing frame 636 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.19it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.48s

--- Processing frame 637 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.73it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.49s

--- Processing frame 638 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.39it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.48s

--- Processing frame 639 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.97it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.49s

--- Processing frame 640 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.60it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.48s

--- Processing frame 641 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.16it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.49s

--- Processing frame 642 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.77it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.54s

--- Processing frame 643 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.86it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.54s

--- Processing frame 644 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.59it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.47s

--- Processing frame 645 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.17it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object', 'object', 'object', 'object'] tensor([0.4557, 0.2800, 0.2865, 0.2978, 0.2303])
Frame processed in 1.97s

--- Processing frame 646 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.57it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.50s

--- Processing frame 647 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.61it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.63s

--- Processing frame 648 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.39it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.62s

--- Processing frame 649 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.42it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.51s

--- Processing frame 650 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.11it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.49s

--- Processing frame 651 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.74it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.49s

--- Processing frame 652 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.55it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.49s

--- Processing frame 653 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.99it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.50s

--- Processing frame 654 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.79it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.48s

--- Processing frame 655 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.98it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.51s

--- Processing frame 656 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.03it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.54s

--- Processing frame 657 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.71it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.46s

--- Processing frame 658 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.70it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.54s

--- Processing frame 659 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.35it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.52s

--- Processing frame 660 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.14it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object', 'object'] tensor([0.6187, 0.4430, 0.2289])
Frame processed in 1.98s

--- Processing frame 661 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.25it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.47s

--- Processing frame 662 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.94it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.47s

--- Processing frame 663 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 16.27it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.61s

--- Processing frame 664 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.04it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.59s

--- Processing frame 665 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.80it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.48s

--- Processing frame 666 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.94it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.51s

--- Processing frame 667 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.94it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.51s

--- Processing frame 668 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.54it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.47s

--- Processing frame 669 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.17it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.49s

--- Processing frame 670 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.24it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.47s

--- Processing frame 671 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.95it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.50s

--- Processing frame 672 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.80it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.50s

--- Processing frame 673 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.89it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.56s

--- Processing frame 674 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 17.70it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.54s

--- Processing frame 675 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.71it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object', 'object'] tensor([0.5532, 0.4085, 0.2599])
Frame processed in 2.01s

--- Processing frame 676 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 15.15it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.66s

--- Processing frame 677 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.86it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.61s

--- Processing frame 678 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.13it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.51s

--- Processing frame 679 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 16.26it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.57s

--- Processing frame 680 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.55it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.54s

--- Processing frame 681 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.15it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.52s

--- Processing frame 682 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.86it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.53s

--- Processing frame 683 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.48it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.53s

--- Processing frame 684 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.82it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.50s

--- Processing frame 685 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.17it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.54s

--- Processing frame 686 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.36it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.56s

--- Processing frame 687 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.87it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.56s

--- Processing frame 688 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.20it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.63s

--- Processing frame 689 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.40it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.61s

--- Processing frame 690 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.13it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object', 'object', 'object', 'object'] tensor([0.5159, 0.3349, 0.2520, 0.2444, 0.2648])
Frame processed in 2.01s

--- Processing frame 691 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.09it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.59s

--- Processing frame 692 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.54it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.52s

--- Processing frame 693 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.94it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.49s

--- Processing frame 694 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.52it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.50s

--- Processing frame 695 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.12it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.52s

--- Processing frame 696 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.33it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.53s

--- Processing frame 697 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.58it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.53s

--- Processing frame 698 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.98it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.56s

--- Processing frame 699 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.46it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.55s

--- Processing frame 700 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 14.08it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.59s

--- Processing frame 701 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.61it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.55s

--- Processing frame 702 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.66it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.66s

--- Processing frame 703 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.80it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.51s

--- Processing frame 704 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.65it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.57s

--- Processing frame 705 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.28it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object', 'object', 'object'] tensor([0.5599, 0.3841, 0.2842, 0.2351])
Frame processed in 2.19s

--- Processing frame 706 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.09it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.72s

--- Processing frame 707 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.96it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.56s

--- Processing frame 708 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.56it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.53s

--- Processing frame 709 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.24it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.56s

--- Processing frame 710 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.68it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.56s

--- Processing frame 711 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.35it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.52s

--- Processing frame 712 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.20it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.56s

--- Processing frame 713 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.32it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.56s

--- Processing frame 714 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.49it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.53s

--- Processing frame 715 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.60it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.54s

--- Processing frame 716 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 15.27it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.72s

--- Processing frame 717 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 14.75it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.70s

--- Processing frame 718 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.29it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.65s

--- Processing frame 719 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.05it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.63s

--- Processing frame 720 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.45it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object', 'object', 'object', 'object'] tensor([0.4471, 0.3059, 0.3125, 0.3328, 0.2241])
New object 7 detected at frame 720


propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 2.19s

--- Processing frame 721 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 14.49it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.78s

--- Processing frame 722 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.38it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.72s

--- Processing frame 723 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.64it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.75s

--- Processing frame 724 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.37it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.87s

--- Processing frame 725 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.56it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.78s

--- Processing frame 726 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 23.82it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.74s

--- Processing frame 727 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 12.88it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.77s

--- Processing frame 728 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 18.46it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.89s

--- Processing frame 729 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.18it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.79s

--- Processing frame 730 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.91it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.82s

--- Processing frame 731 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.11it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.83s

--- Processing frame 732 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.54it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.73s

--- Processing frame 733 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.47it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.71s

--- Processing frame 734 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.00it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.83s

--- Processing frame 735 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 14.78it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object'] tensor([0.5683, 0.3827])
Frame processed in 2.42s

--- Processing frame 736 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.98it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.88s

--- Processing frame 737 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.71it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.68s

--- Processing frame 738 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.02it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.78s

--- Processing frame 739 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.72it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.70s

--- Processing frame 740 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.11it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.67s

--- Processing frame 741 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.24it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.84s

--- Processing frame 742 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.70it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.79s

--- Processing frame 743 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 21.23it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.72s

--- Processing frame 744 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.77it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.80s

--- Processing frame 745 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.21it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.78s

--- Processing frame 746 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 14.87it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.91s

--- Processing frame 747 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 20.31it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.82s

--- Processing frame 748 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.06it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.72s

--- Processing frame 749 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 22.33it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.83s

--- Processing frame 750 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 19.68it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Analysis with Grounding Dino




['object', 'object', 'object'] tensor([0.5357, 0.4033, 0.2727])
Frame processed in 2.67s

--- Processing frame 751 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 24.72it/s]
propagate in video:   0%|          | 0/1 [00:00<?, ?it/s]


Frame processed in 1.97s

--- Processing frame 752 ---


frame loading (JPEG): 100%|██████████| 1/1 [00:00<00:00, 25.92it/s]


Exiting gracefully...
Processing completed or interrupted after 752 frames


# TEST
Questa versione processa i frame in due thread: uno legge i dati e l'altro esegue l'inferenza. Ci sono ancora bug e può bloccarsi !

In [None]:
import os
import torch
import numpy as np
import cv2
import matplotlib.pyplot as plt
import gc
import time
import threading
import queue
from concurrent.futures import ThreadPoolExecutor

# Path configurations
video_path = "test_video/test_video_san_donato.mp4"
segmented_video_dir = "test_video/output"

# Create output directory if it doesn't exist
os.makedirs(segmented_video_dir, exist_ok=True)

# Create a temporary directory for frame storage
temp_dir = os.path.join("temp_frames")
os.makedirs(temp_dir, exist_ok=True)

# Open the video stream
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise Exception(f"Could not open video stream: {video_path}")

# Get video properties
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
print(f"Video stream at {fps} FPS, resolution: {width}x{height}")

# Create processing queues
frame_queue = queue.Queue(maxsize=2)  # Queue for frames waiting to be processed
result_queue = queue.Queue(maxsize=2)  # Queue for processed results waiting to be saved

# Threading lock for GPU operations
gpu_lock = threading.Lock()

# Object tracking variables
ann_obj_id = 1
last_masks = {}
railway_box = None
frame_idx = 0

# Function to read frames in a separate thread
def read_frames():
    global frame_idx
    while True:
        success, frame = cap.read()
        if not success:
            # Put None as a sentinel value to signal end of stream
            frame_queue.put(None)
            break

        # Convert frame to RGB for visualization
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Save current frame to temp directory
        frame_path = os.path.join(temp_dir, "000000.jpg")
        cv2.imwrite(frame_path, frame)

        # Put frame data in queue
        frame_data = {
            'index': frame_idx,
            'frame': frame,
            'frame_rgb': frame_rgb,
            'frame_path': frame_path
        }
        frame_queue.put(frame_data)
        frame_idx += 1

# Function to process frames using GPU in a separate thread
def process_frames():
    global ann_obj_id, last_masks, railway_box

    while True:
        # Get next frame from queue
        frame_data = frame_queue.get()

        # Check for end of stream
        if frame_data is None:
            result_queue.put(None)
            break

        idx = frame_data['index']
        frame = frame_data['frame']
        frame_rgb = frame_data['frame_rgb']
        frame_path = frame_data['frame_path']

        print(f"\n--- Processing frame {idx} ---")
        start_time = time.time()

        # Acquire GPU lock for model inference
        with gpu_lock:
            # Clear CUDA memory before processing
            torch.cuda.empty_cache()
            gc.collect()

            # Initialize inference state
            inference_state = video_predictor.init_state(video_path=temp_dir)

            # Process based on frame index
            if idx == 0:
                # First frame: detect railway and objects
                dino_boxes, phrases, dino_scores = utility.grounding_Dino_analyzer_plt(
                    frame_path, model, 'railway . object .', device
                )

                # Find railway box and object points
                max_score_railway = 0
                object_points = []

                for i, phrase in enumerate(phrases):
                    if phrase == 'railway' and dino_scores[i] > max_score_railway:
                        railway_box = dino_boxes[i]
                        max_score_railway = dino_scores[i]
                    elif phrase == 'object':
                        x_min, y_min, x_max, y_max = dino_boxes[i]
                        object_points.append([(x_min + x_max) // 2, (y_min + y_max) // 2])

                # Add railway to tracking
                if railway_box is not None:
                    _, out_obj_ids, out_mask_logits = video_predictor.add_new_points_or_box(
                        inference_state=inference_state,
                        frame_idx=0,
                        obj_id=ann_obj_id,
                        box=railway_box,
                    )

                    last_masks[ann_obj_id] = (out_mask_logits[0] > 0).cpu().numpy()

                # Add detected objects to tracking
                for obj_point in object_points:
                    ann_obj_id += 1
                    _, out_obj_ids, out_mask_logits = video_predictor.add_new_points_or_box(
                        inference_state=inference_state,
                        frame_idx=0,
                        obj_id=ann_obj_id,
                        points=[obj_point],
                        labels=np.array([1], np.int32),
                    )

                    if ann_obj_id in out_obj_ids:
                        idx = list(out_obj_ids).index(ann_obj_id)
                        last_masks[ann_obj_id] = (out_mask_logits[idx] > 0).cpu().numpy()
            else:
                # For non-first frames, transfer objects from previous frame
                for obj_id, mask in last_masks.items():
                    # Convert mask to proper format
                    mask_array = np.asarray(mask)
                    if mask_array.ndim > 2:
                        mask_array = mask_array.squeeze()
                        if mask_array.ndim > 2:
                            mask_array = mask_array[0]

                    # Find non-zero coordinates
                    y_indices, x_indices = np.where(mask_array > 0)

                    if len(y_indices) > 0:
                        # Calculate center of mass
                        center_y = int(np.mean(y_indices))
                        center_x = int(np.mean(x_indices))

                        # Add object to current frame
                        if obj_id == 1 and railway_box is not None:
                            video_predictor.add_new_points_or_box(
                                inference_state=inference_state,
                                frame_idx=0,
                                obj_id=obj_id,
                                box=railway_box,
                            )
                        else:
                            video_predictor.add_new_points_or_box(
                                inference_state=inference_state,
                                frame_idx=0,
                                obj_id=obj_id,
                                points=[[center_x, center_y]],
                                labels=np.array([1], np.int32),
                            )

            # Propagate masks in current frame
            result = next(video_predictor.propagate_in_video(
                inference_state,
                start_frame_idx=0
            ))
            _, out_obj_ids, out_mask_logits = result

            # Update masks for next frame
            frame_masks = {}
            for i, obj_id in enumerate(out_obj_ids):
                frame_masks[obj_id] = (out_mask_logits[i] > 0).cpu().numpy()
            last_masks = frame_masks

            # Check for new objects periodically
            if idx % 15 == 0 and idx > 0:
                dino_boxes, phrases, _ = utility.grounding_Dino_analyzer_plt(
                    frame_path, model, 'object .', device
                )

                # Check each detected object
                for i, phrase in enumerate(phrases):
                    if phrase == 'object':
                        x_min, y_min, x_max, y_max = dino_boxes[i]
                        center_x = (x_min + x_max) // 2
                        center_y = (y_min + y_max) // 2

                        # Check if already tracked
                        already_tracked = False
                        for mask in last_masks.values():
                            mask_array = np.asarray(mask)
                            if mask_array.ndim > 2:
                                mask_array = mask_array.squeeze()
                                if mask_array.ndim > 2:
                                    mask_array = mask_array[0]

                            if (0 <= int(center_y) < mask_array.shape[0] and
                                0 <= int(center_x) < mask_array.shape[1] and
                                mask_array[int(center_y), int(center_x)]):
                                already_tracked = True
                                break

                        # Add new object if needed
                        if not already_tracked:
                            ann_obj_id += 1
                            print(f"New object {ann_obj_id} detected at frame {idx}")

                            video_predictor.add_new_points_or_box(
                                inference_state=inference_state,
                                frame_idx=0,
                                obj_id=ann_obj_id,
                                points=[[center_x, center_y]],
                                labels=np.array([1], np.int32),
                            )

                            # Re-propagate with new object
                            result = next(video_predictor.propagate_in_video(
                                inference_state,
                                start_frame_idx=0
                            ))
                            _, out_obj_ids, out_mask_logits = result

                            # Update masks
                            for i, obj_id in enumerate(out_obj_ids):
                                last_masks[obj_id] = (out_mask_logits[i] > 0).cpu().numpy()

        # Calculate processing time
        processing_time = time.time() - start_time
        print(f"Frame {idx} processed in {processing_time:.2f}s")

        # Put results in output queue
        result_data = {
            'index': idx,
            'frame_rgb': frame_rgb,
            'masks': dict(last_masks)  # Make a copy of the masks dict
        }
        result_queue.put(result_data)

        # Clean up
        del inference_state
        gc.collect()

# Function to save visualization results in a separate thread
def save_results():
    while True:
        # Get processed result from queue
        result = result_queue.get()

        # Check for end of processing
        if result is None:
            break

        idx = result['index']
        frame_rgb = result['frame_rgb']
        masks = result['masks']

        # Create visualization
        plt.figure(figsize=(8, 6))
        plt.imshow(frame_rgb)
        for obj_id, mask in masks.items():
            utility.show_mask_v(mask, plt.gca(), obj_id=obj_id)
        plt.savefig(os.path.join(segmented_video_dir, f"frame_{idx:06d}.jpg"))
        plt.close()

        print(f"Frame {idx} visualization saved")

try:
    # Start threads for pipelined processing
    with ThreadPoolExecutor(max_workers=3) as executor:
        reader_future = executor.submit(read_frames)
        processor_future = executor.submit(process_frames)
        writer_future = executor.submit(save_results)

        # Wait for all threads to complete
        reader_future.result()
        processor_future.result()
        writer_future.result()

except (KeyboardInterrupt, SystemExit):
    print("Exiting gracefully...")
finally:
    # Release resources
    cap.release()

    # Clean up temp directory
    for file in os.listdir(temp_dir):
        os.remove(os.path.join(temp_dir, file))
    os.rmdir(temp_dir)

    print(f"Processing completed or interrupted after {frame_idx} frame")