# Diagram

![!\[Alt text\](objectDetect.jpg)](objectDetect.jpg)

## 1. Insert Library

In [1]:
import cv2
import numpy as np
from ultralytics import YOLO
import math

#### setup frame reader

In [2]:
cap = cv2.VideoCapture("./input/y cross.mp4")

ret, frame = cap.read()
while True:
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(30) & 0xFF
    if key == ord("q"):
        cv2.destroyAllWindows()
        break

print(frame.shape)

(720, 1280, 3)


#### Add Frame Writer

In [3]:
writer = cv2.VideoWriter("./output/yoloV8ForExample01.mp4", cv2.VideoWriter_fourcc(*'MP4V'), cap.get(cv2.CAP_PROP_FPS),
                          (frame.shape[1], frame.shape[0]))

#### Add Pretrain Model

In [4]:
model = YOLO("yolov8n.pt")

Downloading https:\github.com\ultralytics\assets\releases\download\v0.0.0\yolov8n.pt to yolov8n.pt...
100%|██████████| 6.23M/6.23M [00:00<00:00, 29.2MB/s]


## Try Detection

In [5]:
results = model(frame)


0: 384x640 37 persons, 2 birds, 155.4ms
Speed: 9.0ms preprocess, 155.4ms inference, 21.7ms postprocess per image at shape (1, 3, 384, 640)


In [6]:
frame

array([[[129, 129, 129],
        [129, 129, 129],
        [129, 129, 129],
        ...,
        [146, 149, 147],
        [147, 150, 148],
        [147, 150, 148]],

       [[129, 129, 129],
        [129, 129, 129],
        [129, 129, 129],
        ...,
        [146, 149, 147],
        [147, 150, 148],
        [147, 150, 148]],

       [[129, 129, 129],
        [129, 129, 129],
        [129, 129, 129],
        ...,
        [147, 150, 148],
        [148, 151, 149],
        [148, 151, 149]],

       ...,

       [[197, 200, 198],
        [198, 201, 199],
        [200, 203, 201],
        ...,
        [229, 229, 229],
        [229, 229, 229],
        [229, 229, 229]],

       [[198, 201, 199],
        [199, 202, 200],
        [201, 204, 202],
        ...,
        [229, 229, 229],
        [229, 229, 229],
        [229, 229, 229]],

       [[198, 201, 199],
        [199, 202, 200],
        [201, 204, 202],
        ...,
        [229, 229, 229],
        [229, 229, 229],
        [229, 229, 229]]

In [7]:
print(results)
for result in results:
    print(result)
    #.boxes.data.tolist()
    

[ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
keys: ['boxes']
masks: None
names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58

In [8]:
print(results[0].boxes.data)

tensor([[9.7388e+02, 3.1310e+02, 1.0291e+03, 4.2696e+02, 8.1975e-01, 0.0000e+00],
        [9.6391e+02, 2.1221e+02, 1.0104e+03, 3.2209e+02, 7.8149e-01, 0.0000e+00],
        [4.4293e+02, 4.3186e+02, 4.9675e+02, 5.6774e+02, 7.7140e-01, 0.0000e+00],
        [1.7298e+02, 2.9066e+02, 2.1746e+02, 4.0754e+02, 7.6594e-01, 0.0000e+00],
        [7.5929e+02, 6.3308e+02, 8.2943e+02, 7.2000e+02, 7.5600e-01, 0.0000e+00],
        [4.9984e+02, 3.0061e+02, 5.4897e+02, 4.1535e+02, 7.4867e-01, 0.0000e+00],
        [1.1959e+03, 9.5811e+01, 1.2334e+03, 1.8230e+02, 7.2868e-01, 0.0000e+00],
        [7.6002e+02, 3.3732e+02, 8.0871e+02, 4.3652e+02, 6.9943e-01, 1.4000e+01],
        [8.0078e+02, 8.4780e+01, 8.3666e+02, 1.7828e+02, 6.8147e-01, 0.0000e+00],
        [4.7252e+02, 1.6082e+02, 5.0974e+02, 2.5713e+02, 6.7029e-01, 0.0000e+00],
        [5.9470e+02, 6.2032e+01, 6.2104e+02, 1.3984e+02, 6.3556e-01, 0.0000e+00],
        [7.2738e+02, 3.0421e+01, 7.5520e+02, 9.8382e+01, 6.3270e-01, 0.0000e+00],
        [9.4520e

[x1, y1, x2, y2, confidance, class_id]

In [9]:
while True:
    for result in results:
        for r in result.boxes.data.tolist():
            startX, startY, endX, endY, score, class_id = r
            trackx=(startX+ endX)//2
            tracky=(startY+ endY)//2
            # cv2.rectangle(frame, (int(startX),int(startY)), (int(endX), int(endY)), (255, 255, 255), 2)
            # cv2.putText(frame, result.names[int(class_id)], (int(trackx), int(tracky)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
            cv2.circle(frame,  (int(trackx), int(tracky)), 1,(0,0,255), 3)
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(30) & 0xFF
    if key == ord("q"):
        cv2.destroyAllWindows()
        break

# vedio cap

## ready

In [10]:
# Reference
# setting of the whole thing
confidance_threshold=0.3
linescale=50
maxDisappeared=10
maxDistance=100
lineX=False



# variable
Framesnumber = 0
totalDown = 0
totalUp = 0
objects = dict()     		# objects[objectID] = [(x,y),disappear_number,Is_going_down,position]
trackableObjects = dict()	# trackableObjects[objectID] = [(x,y),disappear_number,Is_going_down,position]
height, width = frame.shape[:2]
nextid = 0



# in example, line will in middle of vedio in Y
if lineX:
    checkpoint=width*linescale//100
    position=1
else:
    checkpoint=height*linescale//100
    position=0
    

## Tracking

In [11]:
def register(centropt):
	global nextid
	# [np.array[x,y],disappear_number,Is_going_down]
	Is_going_down=True if centropt[0][1]-checkpoint > 0 else False
	# print("{} is : {}".format(nextid,["down" if Is_going_down else "UP"]))
	# print(centropt)
	centropt[2]=Is_going_down
	objects[nextid] = centropt
	nextid +=  1
	
def deregister(disappeared):
    for id in disappeared:
    	del objects[id]


def update(disappeared,rects):
	if len(rects) == 0:
		for id,rect in objects.items():
			objects[id][1]+=1
			if objects[id][1] > maxDisappeared:
				disappeared.append(id)
		return

	if len(objects) == 0:
		for rect in rects:
			register(rect)

	else:
		# rects=[[np.array[x,y],disappear_number,Is_going_down],..]

		# objects[id]=[np.array[x,y],disappear_number,Is_going_down]


		# counting distance on every point compare
		curent_used_center_point=[]
		for id,x in objects.items():

			mindist=None
			minlen=0

			for idx, rect in enumerate(rects):

				if mindist is None:
					mindist=math.dist(x[0],rect[0])
					continue
				nowdist=math.dist(x[0],rect[0])

				if nowdist<mindist:
					mindist=nowdist
					minlen=idx
			
			if mindist<maxDistance:
				objects[id][0]=rects[minlen][0]
				curent_used_center_point.append(minlen)

			# disappare count
			else:
				objects[id][1] += 1

				if objects[id][1] > maxDisappeared:
					disappeared.append(id)


		# unused rect will be register
		for idx, rect in enumerate(rects):
			if idx in curent_used_center_point:
				continue
			register(rect)
		


In [12]:
print("check point: ",checkpoint)
while ret:
    
    results = model(frame)
    
    trackers=[]
    
    this_fram_pt=[]
    need_deleted_id=[]
    if Framesnumber%(30//15)==0:
        for result in results:
            #confidance
            # [973.8834228515625, 313.10400390625, 1029.0665283203125, 426.961181640625, 0.8197469115257263, 0.0]
            for r in result.boxes.data.tolist():
                startX, startY, endX, endY, score, class_id = r
                class_id = int(class_id)
                if score > confidance_threshold and class_id==0:
                    trackx=(startX+ endX)//2
                    tracky=(startY+ endY)//2
                    cv2.rectangle(frame, (int(startX),int(startY)), (int(endX), int(endY)), (255, 255, 255), 2)
                    centropt=np.array((trackx,tracky)).astype(int)
                    this_fram_pt.append([centropt,0,False])

        disappeared=[]     
        update(disappeared,this_fram_pt)
        deregister(disappeared)




        # Counting
        for (id, centropt) in objects.items():
            if id not in trackableObjects:
                trackableObjects[id] = [centropt,False]
                # [[array([263,  56]), 0, False], False]bvb
            else:
                if not trackableObjects[id][1]:
                    distance = centropt[0][1] - checkpoint
                    
                    if  distance < -5 and centropt[2]:
                        # print("id",id)
                        totalUp += 1
                        trackableObjects[id][1]=True
                    elif (distance > 5) and (not centropt[2]):
                        # print("id",id)
                        totalDown += 1
                        trackableObjects[id][1]=True
                trackableObjects[id][0] = centropt
            text = "ID {}".format(id)
            cv2.putText(frame, text, (centropt[0][0] - 10, centropt[0][1] - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # set line
    if lineX:
        cv2.line(frame, (checkpoint, 0), (checkpoint, height), (0, 255, 255), 5)
    else:
        cv2.line(frame, (0, checkpoint), (width, checkpoint), (0, 255, 255), 5)
    

        
        
    info = [
        ("Up", totalUp),
        ("Down", totalDown),
    ]

    for (i, (k, v)) in enumerate(info):
        text = "{}: {}".format(k, v)
        cv2.putText(frame, text, (10, height - ((i * 20) + 20)),
            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
        
    writer.write(frame)
    
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(30) & 0xFF
    if key == ord("q"):
        break
    Framesnumber += 1
    ret, frame=cap.read()

# check to see if we need to release the video writer pointer
if writer is not None:
	writer.release()

cv2.destroyAllWindows()


0: 384x640 36 persons, 68.2ms
Speed: 1.0ms preprocess, 68.2ms inference, 3.0ms postprocess per image at shape (1, 3, 384, 640)



check point:  360


0: 384x640 35 persons, 1 bird, 59.0ms
Speed: 2.0ms preprocess, 59.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 35 persons, 2 birds, 99.0ms
Speed: 2.0ms preprocess, 99.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 35 persons, 1 bird, 75.0ms
Speed: 2.0ms preprocess, 75.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 37 persons, 2 birds, 53.6ms
Speed: 1.0ms preprocess, 53.6ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 38 persons, 2 birds, 53.0ms
Speed: 3.0ms preprocess, 53.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 35 persons, 2 birds, 63.0ms
Speed: 2.0ms preprocess, 63.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 35 persons, 2 birds, 72.0ms
Speed: 2.0ms preprocess, 72.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 36 persons, 2 birds, 65.0ms
Spee

In [13]:

cap.release()