In [1]:
import numpy as np
import math
import pandas as pd
import seaborn as sns
import cv2
%matplotlib inline

***
# Helper Functions

In [2]:
#Return the numbers of true positive, true negative, false positive and false negative
def outcome(y_pred, x_pred, y_true, x_true, scaled_tol):
	TP = TN = FP1 = FP2 = FN = 0

	if x_pred < 0 and y_pred < 0 and x_true < 0 and y_true < 0:
		TN += 1 # predicts no ball and there is no ball
	elif x_pred > 0 and y_pred > 0 and x_true < 0 and y_true < 0:
		FP2 += 1 # predicts a ball but there is no ball
	elif x_pred < 0 and y_pred < 0 and x_true > 0 and y_true > 0:
		FN += 1 # predicts no ball but there is a ball
	elif x_true > 0 and y_true > 0 and x_pred > 0 and y_pred > 0:
		dist = math.sqrt(pow(x_pred-x_true, 2)+pow(y_pred-y_true, 2))
		
		if dist > scaled_tol:
			FP1 += 1 # predicts a ball, there is a ball, but it's too far away
		else:
			TP += 1 # predicts a ball, there is a ball, and it's at the right location

	return np.array((TP, TN, FP1, FP2, FN))

In [3]:
#Return the values of accuracy, precision and recall
def evaluation(TP, TN, FP1, FP2, FN):
	try:
		accuracy = (TP + TN) / (TP + TN + FP1 + FP2 + FN)
	except:
		accuracy = 0
	try:
		precision = TP / (TP + FP1 + FP2)
	except:
		precision = 0
	try:
		recall = TP / (TP + FN)
	except:
		recall = 0
	return np.array((accuracy, precision, recall))

In [4]:
def calc_metrics(df, scaled_tol):
    outcome_array = np.array([0,0,0,0,0])
    for y_pred, x_pred, y_true, x_true in zip(df["Y_Predicted"].array,
                                            df["X_Predicted"].array,
                                            df["Y_True"].array,
                                            df["X_True"].array):

                    
        outcome_array += outcome(y_pred, x_pred, y_true, x_true, scaled_tol)
    
    (TP, TN, FP1, FP2, FN) = outcome_array # unpack array
    (accuracy, precision, recall) = evaluation(TP, TN, FP1, FP2, FN)

    # normalize
    n = np.sum((TP, TN, FP1, FP2, FN))
    TP /= n
    TN /= n
    FP1 /= n
    FP2 /= n
    FN /= n

    metrics = np.array([TP, TN, FP1, FP2, FN, accuracy, precision, recall])

    # scale to [0,100]
    return np.round(100*metrics, 2)

***
# Get performance metrics

In [28]:
perf_df = pd.DataFrame(columns=["video",
        "weights_version",
        "start_frame",
        "end_frame",
        "TP", 
        "TN", 
        "FP1", 
        "FP2", 
        "FN", 
        "acc", 
        "prec", 
        "rec",
        "vid_width",
        "vid_height",
        "scaled_tol",
        ])

In [29]:
perf_df

Unnamed: 0,video,weights_version,start_frame,end_frame,TP,TN,FP1,FP2,FN,acc,prec,rec,vid_width,vid_height,scaled_tol


In [30]:
print(pd.read_csv("InPlayBalls_54min_vid\InPlayBalls_54min_vid_old_weights_predict.csv")["Frame"].array[-1])
print(pd.read_csv("InPlayBalls_54min_vid\InPlayBalls_54min_vid_labels.csv")["Frame"].array[-1])

12809
12809


In [31]:
vids = ["InPlayBalls_54min_vid.mp4", "InPlayBalls_54min_vid.mp4", "InPlayBalls_54min_vid.mp4"]
# stop getting performance after a certain frame
frame_ranges = [(0,12000), (0,7500), (7501,12000)]
tol = .0075

In [32]:
i = 0
for vid, (start_frame, end_frame) in zip(vids, frame_ranges):
    # get video 
    vid_root = vid[:-4]
    video = cv2.VideoCapture(f"{vid_root}/{vid}")
    vid_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    vid_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

    # get labels (cols 2 and 3 are the x,y)
    labels = pd.read_csv(f"{vid_root}/{vid_root}_labels.csv").iloc[:,[2,3]]
    labels.columns = ["X_True", "Y_True"]
    num_labels = labels.shape[0]

    # rescale the labels to the proper dimensions
    labels.iloc[:,0] *= vid_width
    labels.iloc[:,1] *= vid_height

    # scale tolerance to the video dimensions
    scaled_tol = tol*math.sqrt(vid_height**2 + vid_width**2)

    for version in ["old", "new"]:
        # get predictions (cols 2 and 3 are the x,y)
        preds = pd.read_csv(f"{vid_root}/{vid_root}_{version}_weights_predict.csv").iloc[:,[2,3]]
        preds.columns = ["X_Predicted", "Y_Predicted"]
        num_preds = preds.shape[0]

        # merge true and pred
        # cut off last few frames of labels if they didn't get predicted
        df = pd.concat([labels.iloc[0:preds.index[-1]], preds], axis=1)

        # cap which frame performance gets evaluated on
        if start_frame is not None:
            df = df.iloc[start_frame:end_frame,:]
        else:
            start_frame = 0
            end_frame = df.shape[0]

        # get metrics
        metrics = calc_metrics(df, scaled_tol)
        new_row = [vid_root, version, start_frame, end_frame] + list(metrics) + [vid_width, vid_height, round(scaled_tol, 2)]

        # # add to df
        perf_df.loc[i,:] = new_row
        i += 1

# save df
perf_df.to_csv(f"three_vids_old_and_new_weights_metrics.csv", index=False)
perf_df

Unnamed: 0,video,weights_version,start_frame,end_frame,TP,TN,FP1,FP2,FN,acc,prec,rec,vid_width,vid_height,scaled_tol
0,InPlayBalls_54min_vid,old,0,12000,51.72,10.44,7.72,2.37,27.75,62.16,83.68,65.08,1920,1080,16.52
1,InPlayBalls_54min_vid,new,0,12000,62.71,10.45,6.74,2.36,17.73,73.16,87.33,77.96,1920,1080,16.52
2,InPlayBalls_54min_vid,old,0,7500,47.91,11.27,8.39,2.52,29.91,59.18,81.46,61.57,1920,1080,16.52
3,InPlayBalls_54min_vid,new,0,7500,60.06,11.32,7.4,2.47,18.75,71.38,85.89,76.21,1920,1080,16.52
4,InPlayBalls_54min_vid,old,7501,12000,58.06,9.07,6.6,2.11,24.16,67.13,86.95,70.61,1920,1080,16.52
5,InPlayBalls_54min_vid,new,7501,12000,67.13,9.0,5.65,2.18,16.05,76.13,89.56,80.71,1920,1080,16.52


In [36]:
# TP: predicts a ball, there's a ball, and it's at the right location
# TN: predicts no ball and there's no ball
# FP1: predicts a ball, there's a ball, but it's too far away
# FP2: predicts a ball but there's no ball
# FN: predicts no ball but there's a ball