In [None]:
from scannertools.maskrcnn_detection import visualize_labels
from esper.table_tennis.pose_utils import *

import cv2
import random
import pickle
import pycocotools.mask as mask_util
from scipy import ndimage
import numpy as np
from scipy.signal import savgol_filter

In [None]:
video_id = 65
video = Video.objects.filter(id=video_id)[0]
video_ids = [video_id]

# load data

In [None]:
match_intervals_all = pickle.load(open('/app/data/pkl/match_scene_intervals_dict.pkl', 'rb'))
match_intervals_A = match_intervals_all['HW_foreground']
match_intervals_B = match_intervals_all['JZ_foreground']
match_intervals_A.sort()
match_intervals_B.sort()

In [None]:
# collect all openpose for foreground players
fid2openpose_A = {}
fid2openpose_B = {}
for interval in match_intervals_A:
    fid2pose_fg, fid2pose_bg = group_pose_from_interval(interval)
    fid2openpose_A = {**fid2openpose_A, **fid2pose_fg}
for interval in match_intervals_B:
    fid2pose_fg, fid2pose_bg = group_pose_from_interval(interval)
    fid2openpose_B = {**fid2openpose_B, **fid2pose_fg}

# Label clip for simple control demo
According to the x position, segment the clip into three types of the motion: moving left, still, moving right

In [None]:
_, sfid, efid, duration = match_intervals_A[0]
video.download(segment=(sfid/video.fps, efid/video.fps), output_path='/app/tmp/HW_foreground_0.mp4')

In [None]:
def group_motion(interval, fid2openpose, step_size=5, fps=25):
    _, sfid, efid, _ = interval
    x_list = [fid2openpose_A[fid]._format_keypoints()[Pose.Neck, 0] for fid in range(sfid, efid) if fid in fid2openpose]
    x_smooth = savgol_filter(x_list, fps, 2)
    motion_dict = {'left': [], 'right': [], 'still': []}
   
    for i in range(0, efid - sfid, step_size):
        for j in range(i + fps, efid - sfid, step_size):
            x_seg = x_smooth[i : j]
            xmin, xmax = min(x_seg), max(x_seg)
            argmin = np.argmin(x_seg)
            argmax = np.argmax(x_seg)
            if xmax - xmin < 0.01:
                avgx = np.average(x_seg)
                motion_dict['still'] += [{'start_x': avgx, 'end_x': avgx, 'start_fid': i+sfid, 'end_fid' : j+sfid, 
                                          'duration': (j-i)/video.fps}]
            elif (xmax - xmin) > 0.05 and argmin == 0 and argmax == len(x_seg) - 1:
                # decide whether the slope is smooth
                valid = True
                for k in range(0, len(x_seg) - fps, fps):
                    if x_seg[k+fps] - x_seg[k] < 0.01:
                        valid = False
                        break
                if valid:
                    motion_dict['right'] += [{'start_x': xmin, 'end_x': xmax, 'start_fid': i+sfid, 'end_fid' : j+sfid, 
                                          'duration': (j-i)/video.fps}]
            elif (xmax - xmin) > 0.04 and argmax == 0 and argmin == len(x_seg) - 1:
                # decide whether the slope is smooth
                valid = True
                for k in range(0, len(x_seg) - fps, fps):
                    if x_seg[k] - x_seg[k+fps] < 0.01:
                        valid = False
                        break
                if valid:
                    motion_dict['left'] += [{'start_x': xmax, 'end_x': xmin, 'start_fid': i+sfid, 'end_fid' : j+sfid, 
                                          'duration': (j-i)/video.fps}] 
    def merge_overlap_seg(motion_list):
        if len(motion_list) == 0:
            return []
        motion_list.sort(key = lambda x : x['start_fid'])
        motion_list_merge = []
        old_sfid = motion_list[0]['start_fid']
        for seg in motion_list:
            if seg['start_fid'] == old_sfid:
                longest_seg = seg
            elif old_sfid != -1:
                motion_list_merge.append(longest_seg)
                old_sfid = -1
            if seg['start_fid'] > longest_seg['end_fid']:
                old_sfid = seg['start_fid']
                longest_seg = seg
        if len(motion_list_merge) == 0:
            motion_list_merge.append(longest_seg)
        elif longest_seg['start_fid'] != motion_list_merge[-1]['start_fid']:
            motion_list_merge.append(longest_seg)
        return motion_list_merge
    
    motion_dict_filter = {'left': merge_overlap_seg(motion_dict['left']), 'right': merge_overlap_seg(motion_dict['right']), 
                     'still': merge_overlap_seg(motion_dict['still'])}

    return motion_dict_filter

In [None]:
motion_dict = {'left': [], 'right': [], 'still': []}
for interval in match_intervals_A:
    motion_dict_i = group_motion(interval, fid2openpose_A)
    motion_dict['left'] += motion_dict_i['left']
    motion_dict['right'] += motion_dict_i['right']
    motion_dict['still'] += motion_dict_i['still']    

In [None]:
matplotlib.rc('xtick', labelsize=20) 
matplotlib.rc('ytick', labelsize=20) 
plt.plot(np.arange(len(x_list)), x_list, label='original')
plt.plot(np.arange(len(x_list)), x_list_smooth, label='smooth')
plt.legend(fontsize=18)
plt.xlabel('Frame', fontsize=22)
plt.ylabel('Foreground player position X', fontsize=22)
plt.show()

In [None]:
print(len(motion_dict['left']), len(motion_dict['still']), len(motion_dict['right']))

In [None]:
plt.figure(figsize=(10, 10))

for motion in motion_dict['left']:
    plt.scatter(motion['start_x'], motion['end_x'], c='r', s=motion['duration']*50)
for motion in motion_dict['right']:
    plt.scatter(motion['start_x'], motion['end_x'], c='b', s=motion['duration']*50)
for motion in motion_dict['still']:
    plt.scatter(motion['start_x'], motion['end_x'], c='g', s=motion['duration']*50)
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.xlabel('Foreground player start position X', fontsize=22)
plt.ylabel('Foreground player end position X', fontsize=22)
plt.show()

In [None]:
def find_motion(motion_dict, start_x, end_x, duration, motion_type, weight=(1,1,1)):
    best_dist = np.inf
    best_motion = None
    for motion in motion_dict[motion_type]:
        dist = abs(start_x - motion['start_x']) * weight[0] + \
                abs(end_x - motion['end_x']) * weight[1] + \
                abs(duration - motion['duration']) * weight[2]
        if dist < best_dist:
            best_dist = dist
            best_motion = motion
    return best_motion

In [None]:
motion_1 = find_motion(motion_dict, 0.35, 0.65, 1, 'right', (1,1,0))
motion_2 = find_motion(motion_dict, 0.65, 0.35, 1, 'left', (1,1,0))
motion_3 = find_motion(motion_dict, 0.35, 0.35, 3, 'still', (1,1,1))

print(motion_1, motion_2, motion_3)
def motion2interval(motion):
    return (video_id, motion['start_fid'], motion['end_fid'], motion['duration'])
searched_intervals = [motion2interval(motion_1), motion2interval(motion_2), motion2interval(motion_3)]
print(searched_intervals)

In [None]:
_, sfid, efid, _ = searched_intervals[0]
video.download(segment=(sfid / video.fps, efid / video.fps), output_path='/app/tmp/search_seg0.mp4')

In [None]:
from esper.supercut import stitch_video_temporal
stitch_video_temporal(searched_intervals, out_path='/app/result/naive_control.mp4')

In [None]:
plt.figure()
plt.plot(np.arange(5), np.ones(5)*10, )