# **Player detecting and tracking**

***1. Vorbereiten der Daten***

---


Zuerst verbindet man sein Laufwerk mit google Drive und legt die entsprechenden Ordner an.

In [None]:
from google.colab import drive

print("mounting DRIVE...")
drive.mount('/content/gdrive')
root_folder = 'PlayerYolo_darknet_Kopie' #@param {type:"string"}
!ln -s /content/gdrive/My\ Drive/$root_folder /my_drive

Jetzt lädt man sich ein Fußballvideo herunter z.B. bei youtube.
Dieses wird in den Ordner "dataset_preparation" gespeichert.
In einem Unterordner werden nun Images gespeichert, die anschliesend gelabelt werden müssen. 

In [None]:
#aus dem Video werden images extrahiert
import cv2
import os
  
# Read the video from specified path
cam = cv2.VideoCapture("/my_drive/dataset_preparation/ChelseaSalzburg.avi") 
  
try:
      
    # creating a folder named data
    if not os.path.exists('/my_drive/dataset_preparation/images'):
        os.makedirs('/my_drive/dataset_preparation/images')
  
# if not created then raise error
except OSError:
    print ('Error: Creating directory of data')
  
# frame
currentframe = 0
  
while(True):
      
    # reading from frame
    ret,frame = cam.read()
  
    if ret:
        # if video is still left continue creating images
        # save frame
        name = '/my_drive/dataset_preparation/images/images' + str(currentframe) + '.jpg'
        print ('Creating...' + name)
  
        # writing the extracted images
        cv2.imwrite(name, frame)
  
        # increasing counter so that it will
        # show how many frames are created
        currentframe += 30 # i.e. at 30 fps, this advances one second
        cam.set(1, currentframe)
    else:
        break
  
# Release all space and windows once done
cam.release()
cv2.destroyAllWindows()

Nun beginnt das labeln der einzelnen images. An diese stelle muss nun auch entschieden werden welche Klassen man verwenden will. Ich habe folgende gewählt:

1. Player
2. Referee
3. Linesmen
4. Ball
5. Goalkeeper

Das labeln selbst habe ich lokal ausgeführt mit LabelImg (https://github.com/tzutalin/labelImg), unter windows und anaconda.
Die gelabelten images werden anschließend in den Ordner "imagesLab" gespeichert.
Jetzt alle images und die dazugehörigen txt Dateien zippen. Die Zip- Datei muss "obj.zip" benannt werden und anschliesend in den Ordner "dataset" verschoben. 

Um zu sehen, ob es genug labels sind, kann man folgende Zeile ausführen.


In [None]:
import glob, os

TextList = []

os.chdir("/my_drive/dataset_preparation/imagesLab")
for file in glob.glob("*.txt"):
    with open(file, 'r') as f:
	    for line in f:
		    TextList.append(line.split(None,1)[0])

capacity = len(TextList)
index = 0

while index != capacity:
	line = TextList[index]

	for word in line.split():
		index += 1

def count_instance():

    Player = 0
    Referee = 0
    Linesmen = 0
    Ball = 0
    Goalkeeper = 0

    for i in TextList:
        if i == str(0):
            Player += 1
        if i == str(1):
            Referee += 1
        if i == str(2):
            Linesmen += 1
        if i == str(3):
            Ball += 1
        if i == str(4):
            Goalkeeper += 1

    print("/my_drive/dataset_preparation/imagesLab\n\n Player: {} \n Referee: {} \n Linesmen: {} "
          "\n Ball: {} \n Goalkeeper: {}"
               .format(Player, Referee, Linesmen, Ball, Goalkeeper))


count_instance()



Jetzt wird das Archiv von darknet kopiert, mit dessen Hilfe nachher unsere Daten trainiert werden. Damit das Training schneller durchlaufen kann, sollte man die GPU verwenden.

In [None]:
!git clone https://github.com/AlexeyAB/darknet
%cd darknet

OPENCV = True #@param {type:"boolean"}
GPU = True #@param {type:"boolean"}
CUDNN = True #@param {type:"boolean"}
CUDNN_HALF = True #@param {type:"boolean"}
LIBSO = False #@param {type:"boolean"}

print("setting properties...")
if OPENCV:
  print("activating OPENCV...")
  !sed -i 's/OPENCV=0/OPENCV=1/' Makefile

if GPU:
  print("engines CUDA...")
  !/usr/local/cuda/bin/nvcc --version
  
  print("activating GPU...")
  !sed -i 's/GPU=0/GPU=1/' Makefile

if CUDNN:
  print("activating CUDNN...")
  !sed -i 's/CUDNN=0/CUDNN=1/' Makefile

if CUDNN_HALF:
  print("activating CUDNN_HALF...")
  !sed -i 's/CUDNN_HALF=0/CUDNN_HALF=1/' Makefile

if LIBSO: #under processing
  print("activating LIBSO...")
  !sed -i 's/LIBSO=0/LIBSO=1/' Makefile

print("making...")
!make

print("FINISH!")

In [None]:
dataset_folder = 'dataset' #@param {type:"string"}

print("loading dataset...")
!cp /my_drive/$dataset_folder/obj.zip ../

print("unziping dataset...")
!unzip ../obj.zip -d data/obj

Ggf. müssen nun im Ordner "dataset" die Dateien angepasst werden. 
- obj.name:  Hier müssen die gleichen Klassen und in der gleichen Reihenfolge wie in classes.txt im dataset_preparation vorliegen.
- obj.data:  Hier stehen Informationen über die Klassen und das Training. Ggf. müssen auch die Pfade angepasst werden.
- yolo-obj.cfg: Die Datei enthält wichtige Informationen zum Netzwerk, die Größe der images, Filter, Klassen usw. Für die Anpassung der Datei siehe auch [Darknet](https://github.com/AlexeyAB/darknet#how-to-train-to-detect-your-custom-objects), [NET CFG Parameters](https://github.com/AlexeyAB/darknet/wiki/CFG-Parameters-in-the-%5Bnet%5D-section), [Layers CFG Parameters](https://github.com/AlexeyAB/darknet/wiki/CFG-Parameters-in-the-different-layers).

Danach werden die Daten in das Projekt geladen




In [None]:
configuration_folder = 'configuration_files' #@param {type:"string"}

print("loading yolo-obj.cfg...")
!cp /my_drive/$configuration_folder/yolo-obj.cfg ./cfg

print("loading obj.names...")
!cp /my_drive/$configuration_folder/obj.names ./data

print("loading obj.data...")
!cp /my_drive/$configuration_folder/obj.data ./data

Darknet benötigt noch eine .txt Datei, die den Pfad der einzelnen images beinhaltet.

In [None]:
script_folder = 'py_scripts' #@param {type:"string"}

script_file = 'generate_train.py' #@param {type:"string"}

print("loading script...")
!cp /my_drive/$script_folder/$script_file ./
print("performing script...")
!python $script_file

Nun wird eine Yolo-Datei mit vortrainierten Gewichten geladen. Hiermit kann das Training beschleunigt werden, denn dank des Transfer-Lernens, können die vortrainierten ersten Schichten übernommen werden.

In [None]:
weights_folder = 'backup' #@param {type:"string"}

pre_trained_weights_file = 'yolov4.conv.137' #@param {type:"string"}

print("loading pre_trained weights...")
!cp /my_drive/$weights_folder/$pre_trained_weights_file ./

loading pre_trained weights...


***2. Das Training***

---
Es gibt nun die Möglichkeit die mAP-Berechnung (mittlere durchschnittliche Genauigkeit) alle 100 Iterationen anzeigen zu lassen

Ebenfalls legt man nun fest, ob das Training neu gestartet wird oder fortgesetzt wird. Sollte das Skript einmal unterbrochen werden, z.B. weil die Laufzeit unterbrochen wird, kann man so das Training fortsetzten. 


In [None]:
train_using_mAP = True #@param {type:"boolean"}

option = 'RESUME TRAINING' #@param ["START TRAINING FROM BEGINNING", "RESUME TRAINING"]

if option == 'START TRAINING FROM BEGINNING':
  if train_using_mAP:
    !./darknet detector train data/obj.data cfg/yolo-obj.cfg $pre_trained_weights_file -dont_show -map
  else:
    !./darknet detector train data/obj.data cfg/yolo-obj.cfg $pre_trained_weights_file -dont_show
else:
  if train_using_mAP:
    !./darknet detector train data/obj.data cfg/yolo-obj.cfg /my_drive/$weights_folder/yolo-obj_last.weights -dont_show -map
  else:
    !./darknet detector train data/obj.data cfg/yolo-obj.cfg /my_drive/$weights_folder/yolo-obj_last.weights -dont_show


Hat man mit mAP trainiert, können nun Graphen gespeichert werden, in dem man erst den gewünschen Bereich der Iterationen festlegt.

In [None]:
initial_iteration_number = 100 #@param {type:"slider", min:100, max:10000, step:100}
final_iteration_number = 3000 #@param {type:"slider", min:100, max:10000, step:100}

chart_name = "mAP-chart_iter:{}-{}.png".format(initial_iteration_number, final_iteration_number)

print("saving chart...")

!cp chart.png //my_drive/charts/$chart_name

Hier gibt man den Namen der Gewichte ein, auf deren Grundlage die Metriken berechnet werden sollen.

In [None]:
weights_name = 'yolo-obj_1000.weights' #@param {type:"string"}

!cp /my_drive/backup/$weights_name ./

!./darknet detector map data/obj.data cfg/yolo-obj.cfg $weights_name

***3. Testen***

---

In diesem Abschnitt testen wir nun die Objekterkennung in einem Video (aber nicht das Video aus dem wir unsere Label für das Training gewonnen haben) und speichern die Ergebnisse.

Sollte das Video nicht den Erwartungen entsprechen, muss noch Feintuning vorgenommen werden. Man kann z.B. ein anderes Gewicht in der oberen Kommandozeile auswählen und nochmals versuchen.

In [None]:
video_test_folder = 'test_videos' #@param {type:"string"}
input_name = 'ChelseaSalzburgclip.mp4' #@param {type:"string"}
weights_type = 'yolo best' #@param ["yolo best", "yolo last"]
predictions_folder = 'predictions' #@param {type:"string"}
output_name = 'ChelseaSalzburgclip.mp4' #@param {type:"string"}
prediction_version =  1 #@param {type:"integer"}

prediction_name = "{}_prediction_version:{}.avi".format(output_name, prediction_version)

if weights_type == "yolo last":
  yolo_weights = "yolo-obj_last.weights"
else:
  yolo_weights = "yolo-obj_best.weights"

print("detecting...")
!./darknet detector demo data/obj.data cfg/yolo-obj.cfg /my_drive/backup/$yolo_weights -dont_show /my_drive/$video_test_folder/$input_name -i 0 -out_filename prediction.avi

print("copying prediction in Drive...")
!cp prediction.avi /my_drive/$predictions_folder/$prediction_name


Nachdem das Testen nun erfolgreich war, kann man das nachstehende Script verwenden. Hier werden nun noch die Mannschaften anhand der Trikotsfarbe unterschieden. Trikofarben müssen ggf. angepasst werden. Hier wird weiss und rot verwendet.

In [None]:
!pip install pytesseract

In [None]:
!pip install tesseract-ocr

In [None]:
import cv2 as cv
from scipy.spatial import distance
import numpy as np
from collections import OrderedDict
from google.colab.patches import cv2_imshow
#import pytesseract
from numpy import argmax
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import load_model



#please provide the paths for resources.
yolomodel = {"config_path":"/my_drive/configuration_files/yolo-obj.cfg",
              "model_weights_path":"/my_drive/configuration_files/yolo-obj_best.weights",
              "dataset_names":"/my_drive/configuration_files/obj.names",
              "confidence_threshold": 0.5,
              "threshold":0.3
             }
             
video_src = "/my_drive/test_video/video.mp4"

temp=cv.imread(r'/my_drive/dataset_preparation/temp.jpg',0)
ground=cv.imread('/my_drive/dataset_preparation/dst.jpg')
out2 = cv.VideoWriter('plane.avi', cv.VideoWriter_fourcc(*"MJPG"), 20, (900,600))

class Tracker:
    def __init__(self, maxLost = 30):
        self.nextObjectID = 0
        self.objects = OrderedDict()
        self.lost = OrderedDict()
        self.maxLost = maxLost

    def addObject(self, new_object_location):
        self.objects[self.nextObjectID] = new_object_location
        self.lost[self.nextObjectID] = 0
        self.nextObjectID += 1

    def removeObject(self, objectID):
        del self.objects[objectID]
        del self.lost[objectID]

    @staticmethod
    def getLocation(bounding_box):
        xlt, ylt, xrb, yrb = bounding_box
        return (int((xlt + xrb) / 2.0), int((ylt + yrb) / 2.0))

    def update(self,  detections):
        if len(detections) == 0:
            lost_ids = list(self.lost.keys())

            for objectID in lost_ids:
                self.lost[objectID] +=1
                if self.lost[objectID] > self.maxLost: self.removeObject(objectID)

            return self.objects

        new_object_locations = np.zeros((len(detections), 2), dtype="int")

        for (i, detection) in enumerate(detections): new_object_locations[i] = \
            self.getLocation(detection)

        if len(self.objects)==0:
            for i in range(0, len(detections)): self.addObject(new_object_locations[i])
        else:
            objectIDs = list(self.objects.keys())
            previous_object_locations = np.array(list(self.objects.values()))
            D = distance.cdist(previous_object_locations, new_object_locations)
            row_idx = D.min(axis=1).argsort()
            cols_idx = D.argmin(axis=1)[row_idx]
            assignedRows, assignedCols = set(), set()

            for (row, col) in zip(row_idx, cols_idx):
                if row in assignedRows or col in assignedCols:
                    continue

                objectID = objectIDs[row]
                self.objects[objectID] = new_object_locations[col]
                self.lost[objectID] = 0
                assignedRows.add(row)
                assignedCols.add(col)

            unassignedRows = set(range(0, D.shape[0])).difference(assignedRows)
            unassignedCols = set(range(0, D.shape[1])).difference(assignedCols)

            if D.shape[0]>=D.shape[1]:
                for row in unassignedRows:
                    objectID = objectIDs[row]
                    self.lost[objectID] += 1
                    if self.lost[objectID] > self.maxLost:
                        self.removeObject(objectID)
            else:
                for col in unassignedCols:
                    self.addObject(new_object_locations[col])
        return self.objects

net = cv.dnn.readNetFromDarknet(yolomodel["config_path"], yolomodel["model_weights_path"])
labels = open(yolomodel["dataset_names"]).read().strip().split("\n")
np.random.seed(12345)
layer_names = net.getLayerNames()
layer_names = [layer_names[i-1] for i in net.getUnconnectedOutLayers()]

bbox_colors = np.random.randint(0, 255, size=(len(labels), 3))
maxLost = 5
tracker = Tracker(maxLost = maxLost)
cap = cv.VideoCapture(video_src)

(H, W) = (None, None)
writer = None

def count_nonblack_np(img):
    return img.any(axis=-1).sum()

def color_detection(image, show = False): #<-- True for debugging

    boundaries = [([17, 15, 75], [50, 56, 200]), #red
    ([187, 169, 112], [255, 255, 255])] #white
    
    i = 0
    for (lower, upper) in boundaries:
        lower = np.array(lower, dtype = "uint8")
        upper = np.array(upper, dtype = "uint8")

        try:
            mask = cv.inRange(image, lower, upper)
            output = cv.bitwise_and(image, image, mask = mask)
            tot_pix = count_nonblack_np(image)
            color_pix = count_nonblack_np(output)
        except:
            print("strange things..")
            return 'not_sure'
        ratio = color_pix/tot_pix
        print("ratio is:", ratio)
        if ratio > 0.01 and i == 0:
            return 'red'
        elif ratio > 0.01 and i == 1:
            return 'white'

        i += 1

        if show == True:
            cv.imshow("images", np.hstack([image, output]))
            if cv.waitKey(0) & 0xFF == ord('q'):
              cv.destroyAllWindows()
    return 'not_sure'

while(True):

    success, image = cap.read()

    if not success:
        print("error!")
        break

    if W is None or H is None: (H, W) = image.shape[:2]

    blob = cv.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    detections_layer = net.forward(layer_names)
    detections_bbox = []
    boxes, confidences, classIDs = [], [], []

    for out in detections_layer:
        for detection in out:
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]

            if confidence > yolomodel['confidence_threshold']:
                box = detection[0:4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")
                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))
                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
                classIDs.append(classID)

    idxs = cv.dnn.NMSBoxes(boxes, confidences, yolomodel["confidence_threshold"], yolomodel["threshold"])

    if len(idxs)>0:
        for i in idxs.flatten():
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])
            detections_bbox.append((x, y, x+w, y+h))

            clr = [int(c) for c in bbox_colors[classIDs[i]]]

            if labels[classIDs[i]] == "Player":
                color = color_detection(image[y:y+h,x:x+w])
                if color != 'not_sure':
                    if color == 'red':
                        cv.rectangle(image, (x, y), (x+w, y+h), (0, 0, 0), 2)
                    else:
                        cv.rectangle(image, (x, y), (x+w, y+h), (0, 0, 255), 2)         

            else:
                cv.rectangle(image, (x, y), (x+w, y+h), clr, 2)

            #playernumber = ""
            """if labels[classIDs[i]] == "Player":
              hsv = cv.cvtColor(image, cv.COLOR_BGR2HSV)
              msk = cv.inRange(hsv, np.array([0, 0, 175]), np.array([179, 255, 255]))
              krn = cv.getStructuringElement(cv.MORPH_RECT, (5, 3))
              dlt = cv.dilate(msk, krn, iterations=1)
              thr = 255 - cv.bitwise_and(dlt, msk)
              playernumber = pytesseract.image_to_string(thr, config="--psm 10")"""
           
            cv.putText(image, "{}: {:.4f}".format(labels[classIDs[i]], confidences[i]), (x, y-5), cv.FONT_HERSHEY_SIMPLEX, 0.5, clr, 2)

    objects = tracker.update(detections_bbox)    

    for (objectID, centroid) in objects.items():
        text = "ID {}".format(objectID)        
        cv.putText(image, text, (centroid[0] - 10, centroid[1] - 10), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        cv.circle(image, (centroid[0], centroid[1]), 4, (0, 255, 0), -1)
    
    """def plane(player1,ball):
      player1=labels[classIDs[i]] == "Player"
      coptemp=ground.copy()
      matrix=np.array([[ 2.56945407e-01,  5.90910632e-01,  1.94094537e+02],
                     [-1.33508274e-02,  1.37658562e+00, -8.34967286e+01],
                     [-3.41878940e-05,  1.31509536e-03,  1.00000000e+00]])
    
      for p in player1:
        x=p[0]+int(p[2]/2)
        y=p[1]+p[3]
        pts3 = np.float32([[x,y]])
        pts3o=cv.perspectiveTransform(pts3[None, :, :],matrix)
        x1=int(pts3o[0][0][0])
        y1=int(pts3o[0][0][1])
        pp=(x1,y1)
        if(p[4]==0):
            cv.circle(coptemp,pp, 15, (255,0,0),-1)
        elif p[4]==1:
            cv.circle(coptemp,pp, 15, (255,255,255),-1)
        elif p[4]==2:
            #print hakm
            #cv.circle(coptemp,pp, 15, (0,0,255),-1)
            pass
      if len(ball) !=0:
        
        xb=ball[0]+int(ball[2]/2)
        yb=ball[1]+int(ball[3]/2)
        pts3ball = np.float32([[xb,yb]])
        pts3b=cv.perspectiveTransform(pts3ball[None, :, :],matrix)
        x2=int(pts3b[0][0][0])
        y2=int(pts3b[0][0][1])
        pb=(x2,y2)
        cv.circle(coptemp,pb, 15, (0,0,0),-1)
      return coptemp
    opr=0
    while(cap.isOpened()):
      ret, frame = cap.read()
    
      players=[]
      ball=[]
      if opr<310:
        opr=opr+1
        continue     
    
    if ret == True :
        
      copy=frame.copy()
      gray= cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
        
      height, width, channels = frame.shape
        
      blob = cv.dnn.blobFromImage(frame, 0.00392, (416, 416), (0, 0, 0), True, crop=False)

      net.setInput(blob)
      outs = net.forward(output_layers)
      outs=get_players(outs, height, width)
      for i in range(len(outs)):
        x, y, w, h = outs[i]
        roi = frame[y:y+h,x:x+w]
            
          #some frames are bad so resize function throw an error
        try:
            roi=cv.resize(roi, (96,96))
        except:
          continue
          ym=model.predict(np.reshape(roi,(1,96,96,3)))
          ym=argmax(ym)
            
          players.append([x,y,w,h,ym])
            
          if ym==0:
                cv.rectangle(copy, (x, y), (x + w, y + h), (0,0,255), 2)
          elif ym==1:
                cv.rectangle(copy, (x, y), (x + w, y + h), (0,255,0), 2)
          elif ym==2:
                cv.rectangle(copy, (x, y), (x + w, y + h), (255,0,0), 2)
            
        
          res = cv.matchTemplate(gray,temp,cv.TM_SQDIFF_NORMED)
          min_val, max_val, min_loc, max_loc = cv.minMaxLoc(res)
          if min_val < 0.05:
            top_left = min_loc
            bottom_right = (top_left[0] + wt, top_left[1] + ht)
            ball.append(top_left[0])
            ball.append(top_left[1])
            ball.append(wt)
            ball.append(ht)
            cv.rectangle(copy,top_left, bottom_right, (0,255,100), 2)
            
        p=plane(player1, ball)
            
        out.write(copy)
        out2.write(p)
        
        cv.imshow(p)"""
    
    """src=cv.imread('/content/drive/MyDrive/PlayerYolo_darknet_OpenCV/dataset_preparation/src.jpg')
    dst=cv.imread('/content/drive/MyDrive/PlayerYolo_darknet_OpenCV/dataset_preparation/dst.jpg')
    #srcs=src.shape
    #dsts=dst.shape
    pts1 = np.float32([[940,96],[1427,395],[455,395],[943,1022]])
    pts2 = np.float32([[450,33],[540,300],[362,302],[450,567]])
    pts3 = np.float32([[943,395]])
    M = cv.getPerspectiveTransform(pts1,pts2)
    print(M)
    pts3o=cv.perspectiveTransform(pts3[None, :, :],M)
    x=int(pts3o[0][0][0])
    y=int(pts3o[0][0][1])
    p=(x,y)
    cv.circle(dst,p, 5, (0,0,255),-1)
    #image1=image if labels
    #out2 = cv.VideoWriter('plane.avi',cv.VideoWriter_fourcc('M','J','P','G'), 20, (900,600))
    #out.write(dst)

   
    out2 = cv.warpPerspective(image,M,(500, 600),flags=cv.INTER_LINEAR)
    #gray= cv.cvtColor(plane, cv.COLOR_BGR2GRAY)
    #plane = np.array(plane, dtype="uint8")
    #p=plane
    #cv.imread(plane)
        #out2.write(plane)
    #gray= cv.cvtColor(plane, cv.COLOR_BGR2GRAY)     
    cv2_imshow(out2)
    #gray= cv.cvtColor(plane, cv.COLOR_BGR2GRAY) """ 
    cv2_imshow(image)


    if cv.waitKey(1) & 0xFF == ord('q'):
        break

    if writer is None:
        fourcc = cv.VideoWriter_fourcc(*"MJPG")
        writer = cv.VideoWriter("/my_drive/test_video/result7.mp4", fourcc, 30, (W, H), True)


writer.release()
cap.release()
cv.destroyWindow()