In [30]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [31]:

%cd /content/gdrive/MyDrive/PAKDD/ShotDetection

/content/gdrive/MyDrive/PAKDD/ShotDetection


In [32]:

import numpy as np
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import random
import shutil
import scipy.interpolate
import math
import tensorflow as tf
from tensorflow import keras


# **Define hyperparameters**



In [33]:
CONF_THRESH = 0.95
FPS = 30
OFFSET = 13 # cos we use window of 14 for rnn
IOU_THRESH = 0.05
FRAME_RANGE = 4
WINDOW_LEN =  6
VARIABLE_WINLEN = False
MAX_SEQ_LENGTH = 12
NUM_RNN_LAYERS = 2
NUM_RNN_UNITS = 32
N_CLASSES = 3
RNN_TYPE = 'GRU'
EMBEDDING_LAYER = True
offset = 10
EPOCHS = 150
possible_feats = ['playerpos', 'pose', 'shuttle', 'shuttlepose', 'courtshuttlepose'] #shuttlepose the best
feat_idx = 4
WIN_OVERLAP = 0.75

# **The sequence model**

In [34]:

def get_sequence_model(num_rnn_layers=2, num_rnn_units=32, rnn_type='GRU', embedding_layer=True): 
  features_input = keras.Input((MAX_SEQ_LENGTH, NUM_FEATURES))
  if embedding_layer:
    x = keras.layers.Dense(32, activation="relu")(features_input)
  else:
    x = features_input
    
  if rnn_type=='GRU':
    for i in range(num_rnn_layers - 1):
      x = keras.layers.GRU(num_rnn_units, return_sequences=True)(x)
    x = keras.layers.GRU(num_rnn_units)(x)

  elif rnn_type=='LSTM':
    for i in range(num_rnn_layers - 1):
      x = keras.layers.LSTM(num_rnn_units, return_sequences=True, return_state=True)(x)
    x = keras.layers.LSTM(num_rnn_units)(x)
  else:
    print('No RNN used')
    
  x = keras.layers.Dropout(0.4)(x)
  output = keras.layers.Dense(N_CLASSES, activation="softmax")(x)
  
  rnn_model = keras.Model(features_input, output)
  rnn_model.compile(loss="sparse_categorical_crossentropy", 
        optimizer=tf.keras.optimizers.legacy.Adam(),
        metrics=["accuracy"])
  
  return rnn_model


In [35]:

INPUT_FEATS = possible_feats[feat_idx]
dict_feats_dim = {possible_feats[0]: 8, possible_feats[1]: 68, possible_feats[2]:2, possible_feats[3]: 70, possible_feats[4]: 78}
NUM_FEATURES = dict_feats_dim[INPUT_FEATS]

# **Data**
Preprocess dataset into format suitable for feeding into RNN

indices:
*   0-7: crt
*   8-9: shuttle
*   10-43: pose1
*   44-77: pose2

# **Functions for data processing**

In [36]:

# batching functions
def get_crt_transf_matrix(crt_pts): # transform pixels to coordinates in court model
    crt_pts = crt_pts.astype('float32')
    model_pts = np.array([[0, 0], [crt_w-1, 0], [crt_w-1, crt_h-1], [0, crt_h-1]]).astype('float32')
    M = cv2.getPerspectiveTransform(crt_pts, model_pts)
    return M

def transform_coordinates(coordinates, M):
    coordinates = np.array([coordinates[0], coordinates[1], 1])
    transformed = M.dot(coordinates)
    transformed = transformed / transformed[2]
    return (int(transformed[0]), int(transformed[1]))

def get_feet_cols():
    feet_cols = []
    for player in ['near', 'far']:
      for p in range(15,17):
        feet_cols.append(player + '_pose' + str(p) + '_x')
        feet_cols.append(player + '_pose' + str(p) + '_y')
    return feet_cols

def get_pose_cols():
    pose_cols = []
    for player in ['near', 'far']:
      for p in range(17):
        pose_cols.append(player + '_pose' + str(p) + '_x')
        pose_cols.append(player + '_pose' + str(p) + '_y')
    return pose_cols

def get_shuttle_cols():
    shuttle_cols = ['ball_x', 'ball_y']
    return shuttle_cols

def get_court_cols():
    court_cols = ["tl_x","tl_y","tr_x","tr_y","br_x","br_y","bl_x","bl_y"]
    return court_cols


def window(a, winlen = MAX_SEQ_LENGTH, stepsize = int((1-WIN_OVERLAP)*MAX_SEQ_LENGTH)):
    sub_windows = (
        np.expand_dims(np.arange(winlen), 0) +
        np.expand_dims(np.arange(a.shape[0] + 1 - winlen), 0).T
    )
    
    # Fancy indexing to select every V rows.
    return a[sub_windows[::stepsize]]

def load_X(X_path, winlen = MAX_SEQ_LENGTH, stepsize = int((1-WIN_OVERLAP)*MAX_SEQ_LENGTH)):
    df =  pd.read_csv(X_path) #, index_col=0)
    
    if INPUT_FEATS == possible_feats[0]: # playerpos
      crt_cols = get_court_cols()
      df_tmp = pd.DataFrame(df, columns=crt_cols)
      crt = df_tmp.head(1).to_numpy()[0].reshape((4,2))
      M = get_crt_transf_matrix(crt)

      feet_cols = get_feet_cols()
      df = pd.DataFrame(df, columns=feet_cols)
      array_coords_leftrightfoot = df.to_numpy().reshape((len(df), 8))

      array_feats = np.zeros((len(df), 8))
      for r, row in enumerate(array_coords_leftrightfoot):
          for i in range(4):
            coords = (row[2*i], row[2*i+1])
            x, y = transform_coordinates(coords, M)
            array_feats[r, 2*i] = x
            array_feats[r, 2*i+1] = y
    
    elif INPUT_FEATS == possible_feats[1]: # pose
      cols = get_pose_cols()
      df = pd.DataFrame(df, columns=cols)
    elif INPUT_FEATS == possible_feats[2]: # shuttle
      cols = get_shuttle_cols()
      df = pd.DataFrame(df, columns=cols)
    elif INPUT_FEATS == possible_feats[3]: # shuttle + pose
      shuttle_cols = get_shuttle_cols()
      pose_cols = get_pose_cols()
      cols = shuttle_cols + pose_cols
      df = pd.DataFrame(df, columns=cols)
    elif INPUT_FEATS == possible_feats[4]: # shuttle + pose + court
      crt_cols = get_court_cols()
      shuttle_cols = get_shuttle_cols()
      pose_cols = get_pose_cols()
      cols = crt_cols + shuttle_cols + pose_cols
      df = pd.DataFrame(df, columns=cols)
      
    array_feats = df.to_numpy()
    
    return(window(array_feats, winlen=winlen, stepsize=stepsize))

# **Inference: load checkpoint, load input video, and visualize**

In [37]:

weights_dir = os.path.join(os.getcwd(),'weights')
weights_path = os.path.join(weights_dir, str(EPOCHS))

In [38]:
def sequence_prediction(framefeats, print=False):
  probabilities = sequence_model.predict(framefeats)
  pred_class = np.argmax(probabilities[0])

  return pred_class, probabilities[0]

def csv_prediction(path):
  all_framefeats = load_X(path, winlen = MAX_SEQ_LENGTH, stepsize = 1)
  # all_framefeats = scale_Xdata(all_framefeats)
  probs = sequence_model.predict(all_framefeats)
  predictions = np.argmax(probs, axis=1)
  
  return predictions, probs


def vis_pred_onvideo(vidpath, outvidpath, predictions):
  cap = cv2.VideoCapture(vidpath)
  width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))   # float `width`
  height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))  # float `height`
  fps = int(cap.get(cv2.CAP_PROP_FPS))
  output = cv2.VideoWriter(outvidpath, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

  framenum = 0
  hitframe= 0
  while(True):
    ret, frame = cap.read()
    if(ret):
      if framenum > 6 and framenum - 6 < len(predictions): # because of batching into seq of 12, predictions will be shorter than total frame num
        curr_pred = predictions[framenum-6]
      else: curr_pred = 0
      
      if curr_pred == 1:
        frame[:,:,0] = 255 # frame turns blue
      elif curr_pred == 2:
        frame[:,:,2] = 255 # frame turns red

      # writing the new frame in output
      output.write(frame)
      framenum += 1
    else:
      break
  output.release()
  cap.release()

In [39]:
def domain_probs2labels(probfile, conf_thresh=CONF_THRESH):
    probs = np.load(probfile)
    pred_labels = []
    for prob in probs:
      if np.amax(prob) > conf_thresh:
          pred_labels.append(np.argmax(prob))
      else:
          pred_labels.append(0)
    return pred_labels

def gtfile2labels(labelfile):
    df_label = pd.read_csv(labelfile)
    hit_labels = df_label['player_hit'].values
    return hit_labels

def optimise_hits_naive(hit_preds, fps=FPS, frame_range=FRAME_RANGE): # takes in predicted labels for each frame
    
    # check if hit predictions are in the form of labels, or frame indices
    if all(h in [0,1,2] for h in hit_preds):
        frames_hit = np.where(np.array(hit_preds) > 0)[0]
        tot_frames = len(hit_preds)
        is_predindices = False
    else:
        frames_hit = hit_preds
        tot_frames = 0 if len(hit_preds) == 0 else hit_preds[-1]
        is_predindices = True
    
    # ensure no two hits are within 0.5s of each other
    min_frames_apart = int(fps/2)
    out_pred = np.zeros(tot_frames)
    if len(frames_hit) > 0:
        frames_filtered = []
        frames_filtered.append(frames_hit[0])
        frame_ref = frames_hit[0]
        for i in range(1, len(frames_hit)):
            if frames_hit[i] > frame_ref + min_frames_apart:
                frames_filtered.append(frames_hit[i])
                frame_ref = frames_hit[i]

        for i in range(len(frames_filtered)):
            frame_hit = frames_filtered[i]
            out_pred[frame_hit-frame_range:frame_hit+frame_range+1] = 1 if is_predindices else hit_preds[frame_hit]
    
    return out_pred.astype('int')

def optimize(hit_preds, fps=FPS, frame_range=FRAME_RANGE):
    if all(h in [0,1,2] for h in hit_preds):
        frames_hit = np.where(np.array(hit_preds) > 0)[0]
        tot_frames = len(hit_preds)
        is_predindices = False
    else:
        frames_hit = hit_preds
        tot_frames = 0 if len(hit_preds) == 0 else hit_preds[-1]
        is_predindices = True
    
    # ensure no two hits are within 0.5s of each other
    min_frames_apart = int(fps/2)
    out_pred = np.zeros(tot_frames)
    if len(frames_hit) > 0:
        frames_filtered = []
        frames_hitter = []

        start_index=0
        frames_filtered.append(frames_hit[start_index])
        frames_hitter.append(hit_preds[frames_hit[start_index]])
        frame_ref = frames_hit[0]
        hitter_ref = frames_hitter[0]
        for i in range(start_index+1, len(frames_hit)):
            if frames_hit[i] > frame_ref + min_frames_apart:
                if np.count_nonzero(np.array(hit_preds[frames_hit[i]:frames_hit[i]+frame_range])) >1:
                    frames_filtered.append(frames_hit[i])
                    frames_hitter.append(hit_preds[frames_hit[i]])
                    frame_ref = frames_hit[i]
                    hitter_ref = hit_preds[frames_hit[i]]

        for i in range(len(frames_filtered)):
            frame_hit = frames_filtered[i]
            out_pred[frame_hit] = frames_hitter[i]

    return out_pred

In [40]:
# load model from check point
sequence_model = get_sequence_model(num_rnn_layers=NUM_RNN_LAYERS, num_rnn_units=NUM_RNN_UNITS, rnn_type=RNN_TYPE, embedding_layer=EMBEDDING_LAYER)
print(weights_path)

sequence_model.load_weights(weights_path)


/content/gdrive/MyDrive/PAKDD/ShotDetection/weights/150


<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f71dcd76a10>

In [41]:
csv_file = '1_00_01_x.csv'


outpath = os.path.join('1_00_01_probs.npy')
print(outpath)

preds, probs = csv_prediction(csv_file)

np.save(outpath, np.array(probs))
pred_labels = domain_probs2labels('1_00_01_probs.npy', conf_thresh=CONF_THRESH)
pred_labels1 = optimize(pred_labels)
pred_labels2=[i+offset for i in np.where(np.array(pred_labels1)>0)[0].tolist()]
print(pred_labels2)

1_00_01_probs.npy
[49, 117, 151, 169, 198, 228, 264, 293, 322, 360, 388, 406]


In [42]:
vid_testpath = '1_00_01.mp4'
outvidpath = '1_00_01_output.mp4'
vis_pred_onvideo(vid_testpath, outvidpath, preds)