In [1]:
import cv2 
import numpy as np
import time
import math
from scipy.spatial import distance
from sklearn.cluster import KMeans

#Loading and initialising yolov3 from opencv

In [2]:
args = {
          "confThreshold": 0.9,
          "nmsThreshold":0.4,
          "inpWidth":416,
          "inpHeight":416,
          "bboxAreaToImageArea":0.15,
          "team0":'MIL',
          "team1":'CAVS',
          "colorBoundaries":[
                        ([ 56, -7 ,186], [196, 133, 266]), #white/team0/HSV/ FOR BASIC
                        ([160-70,170-80, 60-30], [160+70,170+80, 60+30]) #red/team1/HSV
                        ],
          "team0HSV":[255, 255, 255], #white/team0/HSV/For KMEANS
          "team1HSV":[255,0,0] #red/team1/HSV
        }


In [3]:
[(160-70+196)/2 ,(-7+133)/2 , (186+266)/2]  

[143.0, 63.0, 226.0]

In [4]:
with open("/Users/sandeep/Desktop/dataandmodles/models/teamDetection/coco.names", 'rt') as f:
    classes = f.read().rstrip('\n').split('\n')

In [5]:
# Get the names of the output layers of the CNN network
# net : an OpenCV DNN module network object
def getOutputsNames(net):
    # Get the names of all the layers in the network
    layersNames = net.getLayerNames()
    # Get the names of the output layers, i.e. the layers with unconnected outputs
    return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]

In [6]:
rootDir = '/Users/sandeep/Desktop/dataandmodles/models/teamDetection'
net = cv2.dnn.readNet(rootDir+"/yolov3.weights",rootDir+"/yolov3.cfg")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_DEFAULT)
# change to cv2.dnn.DNN_TARGET_CPU (slower) if this causes issues (should fail gracefully if OpenCL not available)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL)
output_layer_names = getOutputsNames(net)

#Opencv setup

In [7]:
# dummy on trackbar callback function
def on_trackbar(val):
    return

In [8]:
windowName = 'YOLOv3 Team detection'
cv2.namedWindow(windowName , cv2.WINDOW_NORMAL)
trackbarName = 'reporting confidence > (x 0.01)'
cv2.createTrackbar(trackbarName,windowName,70,100, on_trackbar)


#HelperFunction:Drawing Prediction

In [9]:
def drawPred(image,team,class_name, confidence, left, top, right, bottom, colour):
    # Draw a bounding box.
    cv2.rectangle(image, (left, top), (right, bottom), colour, 3)

    # construct label
    label = '%s:%.2f' % (class_name, confidence)
    label = label+f'| team:{team}'

    #Display the label at the top of the bounding box
    labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
    top = max(top, labelSize[1])
    cv2.rectangle(image, (left, top - round(1.5*labelSize[1])),
        (left + round(1.5*labelSize[0]), top + baseLine), (255, 255, 255), cv2.FILLED)
    cv2.putText(image, label, (left, top), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0,0,0), 1)

#HelperFunction:Post Procces

In [10]:
def postprocess(image, results, threshold_confidence, threshold_nms):
    frameHeight = image.shape[0]
    frameWidth = image.shape[1]

    # Scan through all the bounding boxes output from the network and..
    # 1. keep only the ones with high confidence scores.
    # 2. assign the box class label as the class with the highest score.
    # 3. construct a list of bounding boxes, class labels and confidence scores

    classIds = []
    confidences = []
    boxes = []
    for result in results:
        for detection in result:
            scores = detection[5:]
            classId = np.argmax(scores)
            confidence = scores[classId]
            if confidence > threshold_confidence:
                center_x = int(detection[0] * frameWidth)
                center_y = int(detection[1] * frameHeight)
                width = int(detection[2] * frameWidth)
                height = int(detection[3] * frameHeight)
                left = int(center_x - width / 2)
                top = int(center_y - height / 2)
                classIds.append(classId)
                confidences.append(float(confidence))
                boxes.append([left, top, width, height])

    # Perform non maximum suppression to eliminate redundant overlapping boxes with
    # lower confidences
    classIds_nms = []
    confidences_nms = []
    boxes_nms = []

    indices = cv2.dnn.NMSBoxes(boxes, confidences, threshold_confidence, threshold_nms)
    for i in indices:
        i = i[0]
        classIds_nms.append(classIds[i])
        confidences_nms.append(confidences[i])
        boxes_nms.append(boxes[i])

    # return post processed lists of classIds, confidences and bounding boxes
    return (classIds_nms, confidences_nms, boxes_nms)

#Filtering the preditction

In [11]:
def check_bbox_size(bboxW,bboxH,imgW,imgH):
    bboxToImg = (bboxW*bboxH) / (imgW * imgH)
    return bboxToImg <= args['bboxAreaToImageArea']

In [12]:
def check_label(label):
    return label == 'person'

#ROI color detection

In [13]:
def getRoi(frame, left,top,right,bottom):
    '''
    Helper function for detect_teams
    Returns ROI(region of interest) 
    '''
    roi = frame[top:bottom , left:right,:]
    return roi

In [14]:
def countNonBalckPix(roiMasked):
    '''
    Helper function for findColorRatio
    Returns the number of non black pixels in the roi
    '''
    return roiMasked.any(axis = -1).sum()

In [15]:
def getColorRatio(roi,show=False):
    '''
    Helper function for detect teams
    Returns a list, that contains percentage of the pixel that have the team %colors
    Example: [0.9 , 0.1]. 90% of the pixels are of team 1
    '''
    ratioList = []
    
    for teamColorLower,teamColorUpper in args['colorBoundaries']:
        mask = cv2.inRange(roi , np.array(teamColorLower) , np.array(teamColorUpper))
        roiMasked = cv2.bitwise_and(roi,roi,mask=mask)
        totalColorPix = countNonBalckPix(roiMasked)
        totalPix = countNonBalckPix(roi)
        colorPixRatio = totalColorPix / totalPix
        ratioList.append(colorPixRatio)
        #print(f'totalColrPix:{totalColorPix} , totalPx:{totalPix}')
        if show == True:
            cv2.imshow("images", np.hstack([roi,roiMasked]))
            if cv2.waitKey(0) & 0xFF == ord('q'):
              cv2.destroyAllWindows() 

    return np.array(ratioList)    

In [16]:
# img = cv2.imread('/Users/sandeep/Desktop/dataandmodles/data/cavs.JPG')
# roi = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# getColorRatio(roi, show=True)

In [17]:
def compareRatio(ratioList):
    '''
    Helper function for detectTeam
    Finds the team with highest color ratio.
    Returns string team names or "Uncertain" if not sure
    '''
    maxRatio = max(ratioList)
    if maxRatio < 0.1:
        return 'Uncertain'
    else:      
        if ratioList[1] > ratioList[0]:
            return 'team1'
        elif ratioList[1] <= ratioList[0]:
            return'team0'
            
    

In [18]:
def detectTeamBasic(img,left,top,right,bottom):
    '''
    Given an image(BGR) and the location of ROI
    Finds the team based on ROI color
    '''
    roi = getRoi(img,left,top,right,bottom)
    roiHSV = np.array(cv2.cvtColor(roi, cv2.COLOR_BGR2HSV))
    ratioList = getColorRatio(roiHSV)
    team = compareRatio(ratioList)
    return team

In [19]:
def getTeamInfo(team):
    if team == 'Uncertain':
        return (0,0,0) , 'Uncertain'
    elif team == 'team0': 
        return (0,213,255) , args[team]
    else:
        return (36,36,158) , args[team]

#Team using K-means

In [20]:
def findHistogram(learner):
    '''
    Helper function for detectTeamsKmeans
    Returns a histrogam object for an ROI
    '''
    numLabels = np.arange(0, len(np.unique(learner.labels_)) + 1)
    (hist, _) = np.histogram(learner.labels_, bins=numLabels)
    
    hist = hist.astype("float")
    hist /= hist.sum()
    return hist



In [21]:
def resizeForKMeans(roi):
    '''
    Helper function for detectTeamsKmenas
    Given an roi in HSV space
    Returns reshaped roi of (NumberofPixles x channels)
    '''
    return roi.reshape((roi.shape[0] * roi.shape[1],3))

In [22]:
def getClustersAndPercatage(hist , learner):
    '''
    Helper function for detectTeansKmeans
    Returns a dict with cluster object {'c1':[h,s,v,percentage] , c2:[..]}
    '''
    clusters = {}
    for index,(percent, color) in enumerate(zip(hist, learner.cluster_centers_)):
        colorList = color.astype("uint8").tolist()
        cluster = f'c{index}'
        clusters[cluster] = [colorList[0], colorList[1], colorList[2], int(percent*100)]
    return clusters
    

In [23]:
def getLargestCluster(clusters):
    '''
    Helper function for detectTemsKmeans
    Returns the name/key of the largest cluster in the clusters dict
    '''
    percentages = np.array([clusters[cluster][3]for cluster in clusters])
    max_index = np.argmax(percentages)
    return list(clusters.keys())[max_index]

In [24]:
def getEuclidianDistance(hsv1,hsv2):
    return distance.euclidean(hsv1,hsv2)

In [25]:
def getLearner(nClusters):
    '''
     Returns a KMeans Learner object
    '''
    learner = KMeans(n_clusters=nClusters) #cluster number
    return learner


In [29]:
def detectTeamKmeans(learner,img,left,top,right,bottom):
    '''
    Given an image(BGR) and the location of ROI
    Returns the team using K-Means clustering
    '''
    roi = getRoi(img,left,top,right,bottom)
    roiHSV = np.array(cv2.cvtColor(roi, cv2.COLOR_BGR2RGB))
    roiHSV = resizeForKMeans(roiHSV) #represent as row*column,channel number
    learner.fit(roiHSV)
    hist = findHistogram(learner)
    clusters = getClustersAndPercatage(hist,learner) #clusters is a dict {c1:[h,s,v,perc],c2:[]}
    hsv = clusters[getLargestCluster(clusters)][:-1] # is a list [h,s,v]
    print(hsv)
    teamIndex = np.argmin(np.array([
                                getEuclidianDistance(hsv, args['team0HSV']),
                                 getEuclidianDistance(hsv, args['team1HSV'])
                     ]))
    team = f'team{teamIndex}'
    return team

    

#Controller to switch between basic and Kmeans

In [30]:
def detectTeam(img,left,top,right,bottom, algo='basic', learner=None):
    if algo == 'basic':
        return detectTeamBasic(img,left,top,right,bottom)
    else:
        return detectTeamKmeans(learner,img,left,top,right,bottom)
        

#Object detection boiler template

In [None]:
videoPath ='/Users/sandeep/Desktop/dataandmodles/data/3-Pointer2.mov'
cap = cv2.VideoCapture(videoPath)
frameCount = 0 
rawFrame=[]
learner = getLearner(3)
while cap.isOpened():
    ret,frame = cap.read()
    frameCopy = frame[:]
    if ret:
        start_t = cv2.getTickCount()
        
        #do stuff
        # create a 4D tensor (OpenCV 'blob') from image frame (pixels scaled 0->1, image resized)
        tensor = (cv2.dnn.blobFromImage(frame , 1/255 , (args["inpWidth"], args["inpHeight"]) , [0,0,0] , 1, 
                                        crop=False))
        # set the input to the CNN network
        net.setInput(tensor)
        results = net.forward(output_layer_names)
        
        args['confThreshold'] = cv2.getTrackbarPos(trackbarName,windowName) / 100
        classIDs, confidences, boxes = (postprocess(frame, results, args["confThreshold"], 
                                                    args["nmsThreshold"]))
        for detected_object in range(0, len(boxes)):
            
            box = boxes[detected_object]
            left = box[0]
            top = box[1]
            width = box[2]
            height = box[3]
    
            bboxFit = check_bbox_size(width,height, *frame.shape[0:-1])
            labelFit = check_label(classes[classIDs[detected_object]])
            if bboxFit and labelFit and left>0:
                team= detectTeam(frameCopy, left ,top, left+width,top+height , algo='kmeans' ,
                                 learner=learner)
                
                teamColor, teamName = getTeamInfo(team)
                (drawPred(frame,teamName,classes[classIDs[detected_object]], 
                          confidences[detected_object], 
                          left, top, left + width, top + height, 
                          teamColor))

            t,_ = net.getPerfProfile()
            inference_t = (t * 1000.0 / cv2.getTickFrequency())
            label = ('Inference time: %.2f ms' % inference_t) + (' (Framerate: %.2f fps' % (1000 / inference_t)) + ')'
            cv2.putText(frame, label, (0, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255))
            if frameCount == 496: 
                print(f'{left},{top},{left + width},{top + height}')
                rawFrame = frame[:,:,:]
            frameCount += 1
#         End of do stuff
        
        cv2.imshow(windowName,frame)
        (cv2.setWindowProperty(windowName, cv2.WND_PROP_FULLSCREEN,
                                cv2.WINDOW_FULLSCREEN&False))    
             
        time_now = cv2.getTickCount()
        stop_t = ((time_now - start_t)/cv2.getTickFrequency())*1000
    
        #cv2.imshow("YOLO" , frame)
        
        key = cv2.waitKey(max(2, 40 - int(math.ceil(stop_t)))) & 0xFF
        if key == ord('q'):
            break  
    else:
        cap.release()
        break
cap.release()
cv2.destroyAllWindows()
    

[211, 168, 145]
[218, 170, 150]
[198, 160, 144]
[61, 29, 39]
[76, 77, 79]
[205, 179, 179]
[201, 162, 146]
[39, 25, 34]
[44, 47, 58]
[211, 168, 145]
[220, 171, 152]
[198, 161, 147]
[61, 30, 39]
[202, 175, 172]
[75, 74, 77]
[201, 162, 146]
[39, 25, 34]
[44, 47, 58]
[221, 172, 152]
[211, 169, 145]
[210, 173, 169]
[76, 66, 72]
[213, 170, 159]
[203, 175, 171]
[202, 161, 145]
[214, 173, 158]
[44, 46, 58]
[219, 171, 151]
[211, 168, 145]
[208, 172, 169]
[219, 179, 161]
[201, 161, 145]
[197, 171, 168]
[194, 172, 187]
[212, 171, 159]
[44, 46, 57]
[219, 171, 150]
[211, 168, 145]
[204, 170, 173]
[194, 173, 191]
[202, 162, 147]
[199, 172, 167]
[220, 178, 162]
[213, 171, 160]
[45, 46, 58]
[74, 62, 69]
[33, 27, 44]
[84, 93, 86]
[218, 170, 148]
[211, 168, 145]
[204, 169, 172]
[222, 183, 172]
[199, 172, 168]
[201, 161, 145]
[93, 92, 99]
[81, 65, 69]
[38, 23, 34]
[45, 45, 58]
[32, 27, 43]
[70, 78, 70]
[46, 22, 38]
[56, 36, 49]
[218, 170, 147]
[211, 168, 145]
[205, 170, 172]
[69, 40, 50]
[203, 176, 176]


[186, 154, 158]
[229, 196, 201]
[47, 28, 43]
[206, 163, 148]
[216, 173, 153]
[182, 145, 131]
[225, 178, 154]
[84, 91, 91]
[191, 162, 156]
[228, 195, 203]
[207, 164, 147]
[215, 172, 152]
[70, 36, 41]
[48, 28, 43]
[219, 172, 149]
[83, 91, 91]
[189, 163, 160]
[37, 33, 44]
[185, 153, 158]
[37, 30, 51]
[228, 195, 203]
[206, 163, 145]
[66, 39, 45]
[218, 174, 152]
[47, 28, 43]
[186, 160, 152]
[84, 92, 93]
[220, 173, 151]
[75, 62, 60]
[41, 35, 48]
[209, 166, 150]
[225, 192, 203]
[203, 165, 151]
[58, 36, 46]
[220, 173, 153]
[189, 161, 152]
[218, 174, 153]
[53, 33, 48]
[84, 92, 91]
[77, 64, 62]
[42, 34, 46]
[182, 152, 159]
[60, 49, 74]
[220, 174, 154]
[222, 192, 206]
[64, 38, 45]
[56, 32, 43]
[182, 153, 142]
[218, 173, 152]
[53, 33, 48]
[68, 66, 64]
[77, 69, 69]
[43, 34, 47]
[184, 152, 156]
[58, 47, 71]
[42, 31, 47]
[55, 32, 45]
[219, 173, 152]
[61, 38, 44]
[223, 191, 203]
[183, 153, 141]
[52, 33, 48]
[217, 172, 150]
[85, 92, 93]
[77, 64, 62]
[184, 152, 158]
[43, 31, 47]
[43, 34, 47]
[51, 43, 68

#Team Detection Using K-Means