In [10]:
import tensorflow as tf
import numpy as np
import cv2
import json
import os
from PIL import Image
import time

In [11]:
model_path = r"C:\Users\ASUS\Downloads\ssd-mobilenet-v2-tensorflow2-fpnlite-320x320-v1"
detection_model = tf.saved_model.load(model_path)

In [14]:
LABEL_MAP = {
    1: 'person',
    2: 'car',
    3: 'tree',
    4: 'road',
    5: 'road_sign',
}


In [15]:
def run_inference(model, image_np):
    input_tensor = tf.convert_to_tensor(image_np)
    input_tensor = input_tensor[tf.newaxis,...]
    
    model_fn = model.signatures['serving_default']
    output_dict = model_fn(input_tensor)
    
    return {key:value.numpy() for key,value in output_dict.items()}


In [16]:
def associate_objects_and_subobjects(detection_boxes, detection_classes, detection_scores):
    objects = []
    sub_objects = []
    object_counter = 1
    sub_object_counter = 1

    for i in range(len(detection_boxes)):
        if detection_scores[i] > 0.5:  
            object_class = LABEL_MAP.get(int(detection_classes[i]), 'Unknown')
            bbox = detection_boxes[i]

            obj = {
                "object": object_class,
                "id": object_counter,
                "bbox": [float(coord) for coord in bbox],
            }

            if object_class == 'road_sign':
                sub_obj = {
                    "object": "road_sign_details",
                    "id": sub_object_counter,
                    "bbox": [bbox[0] + 0.1, bbox[1] + 0.1, bbox[2] - 0.1, bbox[3] - 0.1],  
                }
                sub_objects.append(sub_obj)
                obj["subobject"] = sub_obj
                sub_object_counter += 1

            objects.append(obj)
            object_counter += 1

    return objects, sub_objects

In [17]:
def process_video(video_path):
    cap = cv2.VideoCapture(video_path)

    fps = cap.get(cv2.CAP_PROP_FPS)
    print(f"Video FPS: {fps}")

    start_time = time.time()
    frame_count = 0
    detections = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        image_np = cv2.resize(frame, (320, 320))
        output = run_inference(detection_model, image_np)

        detection_boxes = output['detection_boxes'][0]
        detection_classes = output['detection_classes'][0]
        detection_scores = output['detection_scores'][0]

        objects, sub_objects = associate_objects_and_subobjects(detection_boxes, detection_classes, detection_scores)

        for obj in objects:
            detections.append(obj)
            if "subobject" in obj:
                for sub_obj in obj["subobject"]:
                    subobject_image_path = save_subobject_image(frame, sub_obj["bbox"], sub_obj["id"])
                    print(f"Sub-object image saved at: {subobject_image_path}")

        frame_count += 1

    end_time = time.time()
    total_time = end_time - start_time
    print(f"Processed {frame_count} frames in {total_time} seconds.")
    print(f"Frames per second: {frame_count / total_time}")

    cap.release()

    with open('detections.json', 'w') as json_file:
        json.dump(detections, json_file, indent=4)


In [18]:
video_path = r"C:\Users\ASUS\Downloads\Traffic IP Camera video - Tech Channel00001 (720p, h264).mp4"
process_video(video_path)

Video FPS: 25.0
Processed 750 frames in 255.54204392433167 seconds.
Frames per second: 2.9349377835534645
