In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from sklearn.metrics import classification_report, precision_recall_fscore_support, accuracy_score

In [2]:
from tensorflow.keras.models import load_model

# Load model
model = load_model('Models/ClassificationLocationWFWDT1_25_45_90.h5') 



In [3]:
def getPredictions(videoNr, xsplit, ysplit):
    """ Gets predictions from the model
    Args:
      videoNr: Number of the video to get the predictions for 
      xsplit: Number of discrete grid splits in x direction
      ysplit: Number of discrete gird splits in y direction
      
    Returns:
      Predicted locations for all image frames in the given video
    """
    
    # Load and normalize original images
    images = np.load('ImageFrames/ImagesVideo' + str(videoNr) + '_10.npy') 
    images = images/images.max() 
    
    # Load and normalize dilated and thresholded images
    imagesDT = np.load('ImageFrames/ImagesDilatedTreshVideo' + str(videoNr) + '_20.npy') 
    imagesDT = imagesDT/imagesDT.max() 
    
    # Load and normalize annotations for last ten locations
    annotations = np.load('NumpyData/AnnotationLastTenLocs' + str(videoNr) + '_' + str(ysplit) +'_' + str(xsplit) +'_10.npy')
    annotations = annotations/annotations.max()
    
    # Get prediction from model and choose the location with highest probability
    predictions = model.predict([images, imagesDT[..., np.newaxis], annotations])
    predictedLocations = np.argmax(predictions, axis=-1)
    
    return predictedLocations

In [4]:
def calculateMAE(predictions, annotations, xsplit, ysplit):  
    """ Calculates the mean absolute error between the position predicted by the model and the true position of the ball
    Args:
      predictions: Predicted locations for all image frames 
      annotations: True locations for all image frames
      xsplit: Number of discrete grid splits in x direction
      ysplit: Number of discrete gird splits in y direction
      
    Returns:
      Mean absolute error
    """
    
    sumError = 0
    xmax = 1920
    ymax = 1088
    for i in range(len(predictions)):
        prediction = predictions[i]
        
        # Calculate predicted position of the ball (x,y) as middle point of the grid section that was predicted, if a ball was predicted
        if prediction!=0:
            xstart = int(xmax/xsplit*((prediction-1)%xsplit))
            xend = int(xstart + xmax/xsplit)
            ystart = int(ymax/ysplit*np.floor((prediction-1)/xsplit))
            yend = int(ystart + ymax/ysplit)
            x = xstart+(xend-xstart)/2
            y = ystart+(yend-ystart)/2
            predictedOutput = np.array([x, y])
            
        # Sets predicted position to (0,0), if no ball was predicted in an image frame
        else:
            predictedOutput = np.array([0, 0])
            
        # Set true position to (0,0), if no ball is in the image frame, else set postion 
        if -1 in annotations[i]:
            trueOutput = np.array([0, 0])
        else:
            trueOutput = annotations[i]

        # Calcuate euclidean distance between predicted and true position and add to totol error  
        error = np.linalg.norm(predictedOutput-trueOutput)
        sumError = sumError + error
        
    return sumError/len(predictions)

In [5]:
def evaluateVideos(videoNrs, xsplit, ysplit, printSingleVideos):  
    """ Evaluates MAE, Precision, Recall and F1-Score for given videos
    Args:
      videosNrs: Numbers of the videos to evaluate
      xsplit: Number of discrete grid splits in x direction
      ysplit: Number of discrete gird splits in y direction
      printSingleVideos: Flag that defines if stats for all single Videos should be printed
      
    Returns:
      Total Mean absolute error
      Total Precision
      Total Recall
      Total F1-Score
    """
    
    # Get Predictions and annotations for all videos and calculate the individual MAEs for each videos
    allPredictions = []
    allAnnotationsRegression = []
    allAnnotationsClassification = []
    allMae = []
    for videoNr in videoNrs:
        predictions = getPredictions(videoNr,xsplit,ysplit)
        allPredictions.append(predictions)
        annotationsRegression = np.load('NumpyData/AnnotationRegressionVideo' + str(videoNr) + '.npy')
        allAnnotationsRegression.append(allAnnotationsRegression)
        mae = calculateMAE(predictions, annotationsRegression, xsplit, ysplit)
        allMae.append(mae)
        annotationsClassification = np.load('NumpyData/AnnotationClassifyLocVideo' + str(videoNr) + '_' + str(ysplit) +'_' + str(xsplit) +'_10.npy')
        allAnnotationsClassification.append(annotationsClassification)
        accuracy = accuracy_score(annotationsClassification, predictions)
        if(printSingleVideos):
            precision, recall, f1, _ = precision_recall_fscore_support(annotationsClassification, predictions, average='weighted')
            print('videoNr', videoNr)
            print('mae', mae)
            print('accuracy', accuracy)
            print('precision', precision)
            print('recall', recall)
            print('f1-score', f1)

    # Merge the predictions and annotations for all videos to calculate stats for all videos 
    annotationsPredictions = np.zeros((18000,1), dtype=np.uint32)
    annotationsRegressionAllVideos = np.zeros((18000,2))
    annotationsClassifyLocAllVideos = np.zeros((18000,1), dtype=np.uint32)
    maeAllVideos = 0
    shape=0
    sumAccuracy = 0
    for videoNr in videoNrs:
        annosClass = allAnnotationsClassification[videoNr-1]
        annotationsClassifyLocAllVideos[shape:shape+annosClass.shape[0]] = annosClass
        annotationsRegressionAllVideos[shape:shape+annosClass.shape[0]] = annotationsRegressionAllVideos[videoNr-1]
        annotationsPredictions[shape:shape+annosClass.shape[0]] = allPredictions[videoNr-1][...,np.newaxis]
        accuracy = accuracy_score(allAnnotationsClassification[videoNr-1], allPredictions[videoNr-1])
        sumAccuracy = sumAccuracy + accuracy
        maeAllVideos = maeAllVideos + allMae[videoNr-1]
        shape = shape + annosClass.shape[0]
    annotationsClassifyLocAllVideos = annotationsClassifyLocAllVideos[0:shape]
    annotationsRegressionAllVideos = annotationsRegressionAllVideos[0:shape]
    annotationsPredictions = annotationsPredictions[0:shape]
    precision, recall, f1, _ = precision_recall_fscore_support(annotationsClassifyLocAllVideos, annotationsPredictions, average='weighted')
    print('Total')
    print('mae', maeAllVideos/len(videoNrs))
    print('accuracy', sumAccuracy/len(videoNrs))
    print('precision', precision)
    print('recall', recall)
    print('f1-score', f1)
    
    return maeAllVideos/len(videoNrs), sumAccuracy/len(videoNrs), precision, recall, f1

In [None]:
import warnings
warnings.filterwarnings("ignore")

videoNrs = [1,2,3,4,5,6]
xsplit = 45
ysplit = 25
printSingleVideos = True

# Get mae, accuracy, precision, recall and F1-score for given videos
mae, accuracy, precision, recall, f1 = evaluateVideos(videoNrs, xsplit, ysplit, printSingleVideos)

videoNr 1
mae 9.039059148209892
accuracy 0.9636424282855237
precision 0.9617936411003697
recall 0.9636424282855237
f1-score 0.9584716636954569
videoNr 2
mae 16.07053806936804
accuracy 0.9586390927284857
precision 0.9594844910157054
recall 0.9586390927284857
f1-score 0.9552992689325885
videoNr 3
mae 25.60367377560723
accuracy 0.9333111037012337
precision 0.9346738619027772
recall 0.9333111037012337
f1-score 0.9290610583090249
videoNr 4
mae 16.862869877311407
accuracy 0.940646882294098
precision 0.9409737090307442
recall 0.940646882294098
f1-score 0.9357235197752932
videoNr 5
mae 12.364915664041938
accuracy 0.9806602200733577
precision 0.9754172708965648
recall 0.9806602200733577
f1-score 0.976688675969312
