In [1]:
import ipydeps
ipydeps.pip(['imutils'
             ,'jupyterthemes'
             ,'numpy'
             ,'opencv-python'
             ,'pandas'])

import cv2
import numpy as np
import os
import pandas as pd
import time

In [3]:
# Read in recorded video file
frameWidth = 320
frameHeight = 320
cwd = os.getcwd()

filename = input('Select video input (i.e. cam, CPUs.MOV, Fullfield.mp4): ')
filename = (cwd + '/' +filename)
cap = cv2.VideoCapture(filename)
if os.path.isfile(filename) and os.access(filename, os.R_OK):
    print('Capture set to: ', filename)
else:
    print('File does not exist or not readable')

Select video input (i.e. cam, CPUs.MOV, Fullfield.mp4): /Sample Videos/CPUs.MOV
Capture set to:  /Users/sallgaier96/Desktop/Career/Personal/Projects/Monocle/Computer Vision//Sample Videos/CPUs.MOV


In [4]:
# Object Detection: currently using YOLOv3
# Better accuracy? Sometimes people/ball not being detected, 14-19 people detected when expecting23
# Faster speed? Current processing rate is 3-5 fps without great accuracy

# YOLO v3 or YOLO v4 or YOLO v5 or YOLO PP
# Look for sports ball and person type objects (outputs[5] and outputs[38])
# outputs[0] is cx: x coordinate of detected object
# outputs[1] is cy: y coordinate of detected object
# outputs[2] is w: width of detected object
# outputs[3] is h: height of detected object
# outputs[4] is confidence: likelihood that object is present
# rest are likelihood of defined objects in coco file found in classNames array

whT = 320 # using the same height and width, change for video size
confThreshold = 0.5 # higher = more confidence required
nmsThreshold = 0.3 # lower = less boxes, higher = more boxes

classNames = []
classNames = pd.read_csv('coco.names', header = None)
classNames = classNames[(classNames[0] == 'person') | (classNames[0] == 'sports ball')]
# Now the only two objects that we're looking for are sports ball and person

# Slower frame rate, higher accuracy
modelConfiguration = 'yolov3-320.cfg';
modelWeights = 'yolov3.weights';
# Faster frame rate, lower accuracy
# modelConfiguration = 'yolov3-tiny.cfg';
# modelWeidghts = 'yolov3-tiny.weights';

net = cv2.dnn.readNetFromDarknet(modelConfiguration, modelWeights)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

def findObjects(outputs, img):
    hT, wT, cT = img.shape # height, width, and channels of image
    bbox = [] # contains height and width
    classIds = []
    confs = []
    
    for output in outputs: # FOR LOOP HERE ###
        for det in output: # FOR LOOP HERE ###
            scores = det[5:] # removes first five elements
            classId = np.argmax(scores)
            confidence = scores[classId]
            if confidence > confThreshold:
                w,h = int(det[2]*wT), int(det[3]*hT)
                x,y = int((det[0]*wT)-w/2), int((det[1]*hT)-h/2)
                bbox.append([x,y,w,h])
                classIds.append(classId)
                confs.append(float(confidence))
#     print(len(bbox)) # prints how many objects were detected in each frame
    indices = cv2.dnn.NMSBoxes(bbox, confs, confThreshold, nmsThreshold) # indices to keep
#     print(indices)
    for i in indices: # FOR LOOP HERE ###
        i = i[0]
        box = bbox[i]
        x,y,w,h = box[0], box[1], box[2], box[3]
        cv2.rectangle(frame, (x,y), (x+w, y+h), (0,0,255), 2) # BGR color value
#         cv2.putText(frame, f'{classNames[classIds[i]].str.upper()} {int(confs[i]*100)}%',
#                    (x,y-10), cv2.FONT_HERSHEY_DUPLEX, 0.6, (255,0,255), 2) # Printing too much

# def yoloDetection():
while True:
    success, frame = cap.read()

    blob = cv2.dnn.blobFromImage(frame, 1/255, (whT, whT), [0,0,0], 1, crop = False)
    net.setInput(blob)

    layerNames = net.getLayerNames()
    net.getUnconnectedOutLayers() # This gives the pixel coordinates of objects
    outputNames = [layerNames[i[0]-1] for i in net.getUnconnectedOutLayers()] # Name and coordinate
#     print(outputNames)
    outputs = net.forward(outputNames)
    findObjects(outputs, frame)

    cv2.imshow('YOLO Object Detection', frame)
    if cv2.waitKey(1) & 0xFF == 27: # Esc key to exit
            break

cv2.waitKey(1)
cv2.destroyAllWindows()
cv2.waitKey(1)

-1

In [None]:
# How to rectify image to be square like bird's eye view
# Here is a link of what I'm looking for and three potential options
# https://towardsdatascience.com/how-to-track-football-players-using-yolo-sort-and-opencv-6c58f71120b8

# https://docs.opencv.org/2.4/modules/imgproc/doc/geometric_transformations.html#getperspectivetransform
# src = Coordinates of quadrangle vertices in source image
# dst = Coordinates of corresponding quadrangle vertices in destination image
cv2.GetPerspectiveTransform(src, dst, mapMatrix)

# https://docs.opencv.org/2.4/modules/calib3d/doc/camera_calibration_and_3d_reconstruction.html#Mat%20findHomography(InputArray%20srcPoints,%20InputArray%20dstPoints,%20int%20method,%20double%20ransacReprojThreshold,%20OutputArray%20mask)
# method can be 0 (regular method), CV_RANSAC (robust), CV_LMEDS (robust)
cv.FindHomography(src, dst, H, method = 0, ransacReprojThreshold = 3.0, status = None)

# https://docs.opencv.org/2.4/modules/imgproc/doc/geometric_transformations.html#void%20warpPerspective(InputArray%20src,%20OutputArray%20dst,%20InputArray%20M,%20Size%20dsize,%20int%20flags,%20int%20borderMode,%20const%20Scalar&%20borderValue)
# M = 3x3 transformation matrix
# dsize = size of output image
# flags = ???
# borderMode = pixel extrapolation method (BORDER_CONSTANT / BORDER_REPLICATE)
# borderValue = value used in case of constant border, default = 0
cv2.warpPerspective(src, M, dsize[, dst[, flags[, borderMode[, borderValue]]]])