In [8]:
import datetime
from ultralytics import YOLO
import cv2
from deep_sort_realtime.deepsort_tracker import DeepSort
import random

In [9]:
CONFIDENCE_THRESHOLD = 0.8

In [10]:
# initialize the video capture object
video_cap = cv2.VideoCapture("TestAnna2.mp4")

fps = int(video_cap.get(cv2.CAP_PROP_FPS))

total_frames = int(video_cap.get(cv2.CAP_PROP_FRAME_COUNT))

print(fps, total_frames)

# initialize the video writer object
writer = cv2.VideoWriter('output.mp4', 1983148141, int(video_cap.get(cv2.CAP_PROP_FPS)),
                          (int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))))

29 2079


In [11]:
# load the pre-trained YOLOv8n model
model = YOLO("yolov9c.pt")

tracker = DeepSort(max_age=15)

In [12]:
#initialization of the dictionary, which store the centers of detection boxes for trajectory and coordinates of boxes  
objects_detected = {}

k = 0

In [13]:
while k < total_frames:

    #read the frame from the video
    ret, frame = video_cap.read()

    if not ret:
        break

    # run the YOLO model on the frame
    detections = model(frame)[0]

    # initialize the list of bounding boxes and confidences
    results = []
    
    # DETECTION

    # loop over the detections
    for data in detections.boxes.data.tolist():
        
        # extract the confidence (i.e., probability) associated with the prediction
        confidence = data[4]
        
        data_class = data[5]
        
        # filter out weak detections by ensuring the 
        # confidence is greater than the minimum confidence
        if float(confidence) < CONFIDENCE_THRESHOLD:
            
            continue
        
        #do detection only cars and trucks
        if not (data_class == 2 or data_class == 7):
            
            continue
        
        # get the bounding box and the class id
        xmin, ymin, xmax, ymax = int(data[0]), int(data[1]), int(data[2]), int(data[3])
        
        class_id = int(data[5])
        
        # add the bounding box (x, y, w, h), confidence and class id, which is 2 or 7, to the results list
        results.append([[xmin, ymin, xmax - xmin, ymax - ymin], confidence, class_id])

    # TRACKING

    # update the tracker with the new detections
    tracks = tracker.update_tracks(results, frame=frame)
    
    # loop over the tracks
    for track in tracks:
        
        # if the track is not confirmed, ignore it
        if not track.is_confirmed():
            continue

        # get the track id and the bounding box
        track_id = track.track_id
        
        ltrb = track.to_ltrb()

        xmin, ymin, xmax, ymax = int(ltrb[0]), int(ltrb[1]), int(ltrb[2]), int(ltrb[3])
        
        # if it is a new object, set to it random color and put first coordinates and center of the object into the dictionary, 
        #else add coordinates of the track boxes and centers
        if track_id not in objects_detected.keys():
            
            color = (random.randint(1,256), random.randint(1,256), random.randint(1,256))
            
            objects_detected[track_id] = [color, []]

            objects_detected[track_id][1].append([])
            
            objects_detected[track_id][1][0] += [(int(xmin + (xmax - xmin)/2), int(ymin + (ymax - ymin)/2)), (xmin, ymin), (xmax, ymax)]
            
        else:
            
            objects_detected[track_id][1].append([(int(xmin + (xmax - xmin)/2), int(ymin + (ymax - ymin)/2)), (xmin, ymin), (xmax, ymax)])
         
        # draw the bounding box and the track id
        cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), objects_detected[track_id][0], 2)
        
        cv2.rectangle(frame, (xmin, ymin - 20), (xmin + 20, ymin), objects_detected[track_id][0], -1)
        
        cv2.putText(frame, str(track_id), (xmin + 5, ymin - 8),
                    
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, objects_detected[track_id][0], 2)
        
        #if there are more than 1 frame with the detected object, start draw a trajectory
        if len(objects_detected[track_id][1]) > 1:
            
            for i in range(1, len(objects_detected[track_id][1])):
                
                cv2.line(frame, objects_detected[track_id][1][i-1][0], objects_detected[track_id][1][i][0], objects_detected[track_id][0], 3)


    # show the frame to our screen
    #cv2.imshow("Frame", frame)
    
    #add the frame to the output file
    writer.write(frame)
    
    if cv2.waitKey(1) == ord("q"):
        
        break
    
    k+=1
   
video_cap.release()

writer.release()

cv2.destroyAllWindows()

#cv2.waitKey()


0: 384x640 2 cars, 583.4ms
Speed: 8.3ms preprocess, 583.4ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 473.2ms
Speed: 6.3ms preprocess, 473.2ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 511.0ms
Speed: 1.2ms preprocess, 511.0ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 461.4ms
Speed: 1.3ms preprocess, 461.4ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 519.3ms
Speed: 1.7ms preprocess, 519.3ms inference, 0.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 514.9ms
Speed: 5.0ms preprocess, 514.9ms inference, 0.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 471.7ms
Speed: 5.6ms preprocess, 471.7ms inference, 0.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cars, 470.2ms
Speed: 2.1ms preprocess, 470.2ms inference, 0.6ms postprocess per image at shape (1, 3, 38