In [2]:
# import libraries
import os
import time
import cv2
import numpy as np
from model.yolo_model import YOLO

In [3]:
# preprocess images
def process_image(img):

    image = cv2.resize(img, (416, 416), interpolation=cv2.INTER_CUBIC)
    image = np.array(image, dtype='float32')
    image /= 255.
    image = np.expand_dims(image, axis=0)

    return image

In [13]:
# draw detected boxes
def draw(image, boxes, scores):

    for box, score in zip(boxes, scores):
        x, y, w, h = box

        top = max(0, np.floor(x + 0.5).astype(int))
        left = max(0, np.floor(y + 0.5).astype(int))
        right = min(image.shape[1], np.floor(x + w + 0.5).astype(int))
        bottom = min(image.shape[0], np.floor(y + h + 0.5).astype(int))
                
        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
        
        cv2.putText(image, '{0} {1:.2f}'.format('person', score),
                    (top, left - 6),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.6, (0, 0, 255), 1,
                    cv2.LINE_AA)
        
        cv2.circle(image, (int((top + right) / 2), int((left + bottom) / 2)), radius=0, color=(0, 0, 255), thickness=10)

        print('class: {0}, score: {1:.2f}'.format('person', score))
        print('box coordinate x,y,w,h: {0}'.format(box))

        # add cnn code to distinguish employee form customer
        # cropped = image[top:bottom, left:right]
        # cv2.imwrite('appearance/detected_'+ str(top) + '_' + str(left) + '_' + str(bottom) + '_' + str(right) +'.png', cropped) # only using the TOP parameter for encoding - later, use another professional way
    print()

In [26]:
# calculate distance
def distance(boxes):
    person_location = {}

    for i in range(0,len(boxes)):
        x, y, w, h = boxes[i]

        top = max(0, np.floor(x + 0.5).astype(int))
        left = max(0, np.floor(y + 0.5).astype(int))
        right = min(image.shape[1], np.floor(x + w + 0.5).astype(int))
        bottom = min(image.shape[0], np.floor(y + h + 0.5).astype(int))

        centerX = int((top + right) / 2)
        centerY = int((left + bottom) / 2)

        person_location["person" + "_" + str(i)] = centerX,centerY

    return person_location

In [None]:
# import cv2
# import numpy as np

# Load the camera matrix and distortion coefficients
# with np.load('calib.npz') as X:
    # mtx, dist = [X[i] for i in ('mtx', 'dist')]

# Define image points and corresponding real-world points
def homography(image):
image_points = np.array([
    # [320, 240],
    # [400, 240],
    # [320, 320],
    # [400, 320]

    [0, 0],  # Example points
    [image.shape[0], 0],
    [0, image.shape[1]],
    [image.shape[0], image.shape[1]]
], dtype="float32")

world_points = np.array([
    [0, 0],
    [1, 0],
    [0, 1],
    [1, 1]
], dtype="float32")

# Compute the homography matrix
H, status = cv2.findHomography(image_points, world_points)

# Function to transform points using the homography matrix
def warp_point(point, H):
    point = np.array([point[0], point[1], 1.0]).reshape((3, 1))
    warped_point = np.dot(H, point)
    warped_point = warped_point / warped_point[2]
    return (warped_point[0], warped_point[1])

# Example points (centers of bounding boxes)
points_to_transform = [
    (340, 260),
    (360, 260),
    # Add more points as needed
]

# Transform the points
transformed_points = [warp_point(pt, H) for pt in points_to_transform]

# Calculate the distance between transformed points
def calculate_euclidean_distance(point1, point2):
    x1, y1 = point1
    x2, y2 = point2
    return math.sqrt((x2 - x1)**2 + (y2 - y1)**2)

for i in range(len(transformed_points)):
    for j in range(i + 1, len(transformed_points)):
        dist = calculate_distance(transformed_points[i], transformed_points[j])
        print(f"Distance between point {i} and point {j}: {dist:.2f} units")


In [18]:
# detect images
def detect_image(image, yolo):
    
    processed_image = process_image(image)

    start = time.time()
    boxes, _, scores = yolo.predict(processed_image, image.shape)
    end = time.time()

    print('time: {0:.2f}s'.format(end - start))

#   explain how print statement works
#   {0} indicates that a value will be inserted at this position
#   `:.2f` is a format specifier that tells Python to format the value as a floating-point number with 2 decimal places
#   `.format(end - start)` This method is used to insert values into the placeholders within a string
    
    if boxes is not None:
        person_location = distance(boxes)
        print(person_location)
        # draw(image, boxes, scores)

    return image

In [6]:
# detect videos
def detect_video(video, yolo):
    
    # use yolo v3 to detect video.

    video_path = os.path.join("videos", "test", video)
    camera = cv2.VideoCapture(video_path)
    cv2.namedWindow("detection", cv2.WINDOW_AUTOSIZE)

    # Prepare for saving the detected video
    sz = (int(camera.get(cv2.CAP_PROP_FRAME_WIDTH)),
        int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    fourcc = cv2.VideoWriter_fourcc(*'mpeg')

    
    vout = cv2.VideoWriter()
    vout.open(os.path.join("videos", "res", video), fourcc, 20, sz, True)

    while True:
        res, frame = camera.read()

        if not res:
            break

        image = detect_image(frame, yolo)
        cv2.imshow("detection", image)

        # Save the video frame by frame
        vout.write(image)

        if cv2.waitKey(110) & 0xff == 27:
                break

    vout.release()
    camera.release()
    

In [7]:
# import yolo model
yolo = YOLO(obj_threshold=0.5, nms_threshold=0.5)



In [33]:
# testing
for i in range(0,9):
    path = 'data/dataset/'+ str(i) +'.png'
    print(path)
    image = cv2.imread(path)
    print(image.shape)
    # image = detect_image(image, yolo)
    # cv2.imwrite('output/detected_'+ str(i) +'.png', image)
    

data/dataset/0.png
(548, 543, 3)
data/dataset/1.png
(569, 640, 3)
data/dataset/2.png
(567, 573, 3)
data/dataset/3.png
(535, 535, 3)
data/dataset/4.png
(360, 487, 3)
data/dataset/5.png
(369, 457, 3)
data/dataset/6.png
(292, 382, 3)
data/dataset/7.png
(617, 687, 3)
data/dataset/8.png
(573, 630, 3)
