In [None]:
from scannertools.maskrcnn_detection import visualize_one_image
from esper.table_tennis.pose_utils import *

import cv2
import random
import pickle
import pycocotools.mask as mask_util
from scipy import ndimage

In [None]:
video_id = 65
video = Video.objects.filter(id=video_id)[0]
video_ids = [video_id]

# load data

In [None]:
match_intervals_all = pickle.load(open('/app/data/pkl/match_scene_intervals_dict.pkl', 'rb'))
match_intervals_A = match_intervals_all['HW_foreground']
match_intervals_B = match_intervals_all['JZ_foreground']
match_intervals_A.sort()
match_intervals_B.sort()

In [None]:
match_scene_cls = pickle.load(open('/app/data/pkl/match_scene_cls.pkl', 'rb'))

In [None]:
densepose_result = pickle.load(open('/app/data/pkl/densepose_result.pkl', 'rb'), encoding='latin1')
maskrcnn_result = pickle.load(open('/app/data/pkl/maskrcnn_result.pkl', 'rb'))

In [None]:
fid2densepose = {}
for player in densepose_result:
    fid2densepose[player['fid']] = player
fid2maskrcnn = {}
for fid, bbox in enumerate(maskrcnn_result):
    fid2maskrcnn[fid] = bbox

In [None]:
# collect all openpose for foreground players
fid2openpose_A = {}
fid2openpose_B = {}
for interval in match_intervals_A:
    fid2pose_fg, fid2pose_bg = group_pose_from_interval(interval)
    fid2openpose_A = {**fid2openpose_A, **fid2pose_fg}
for interval in match_intervals_B:
    fid2pose_fg, fid2pose_bg = group_pose_from_interval(interval)
    fid2openpose_B = {**fid2openpose_B, **fid2pose_fg}

In [None]:
# collect all densepose for ground players
fid2densepose_A = {}
fid2densepose_B = {}
for (_, sfid, efid, _) in match_intervals_A:
    for fid in range(sfid, efid):
        if 'keyps' in fid2densepose[fid]:
            fid2densepose_A[fid] = fid2densepose[fid]['keyps'][0]
for (_, sfid, efid, _) in match_intervals_B:
    for fid in range(sfid, efid):
        if 'keyps' in fid2densepose[fid]:
            fid2densepose_B[fid] = fid2densepose[fid]['keyps'][0]            

In [None]:
print("Number of foreground pose of A: {} Number of foreground pose of B: {}".format(len(fid2openpose_A), len(fid2openpose_B)))

In [None]:
# visualize match scene using supercut
from esper.supercut import *
stitch_video_temporal(match_intervals_B, out_path='/app/result/JZ_foreground.mp4', im_size=(1920, 1080))

# Nearest neighbor search for pose

In [None]:
poseB = pose_foreground_B[500]
poseA_nearest = get_nearest_pose(poseB[1], pose_foreground_A)

In [None]:
img = load_frame(video, poseB[0], [])
visualize_pose_stick(img, poseB[1], (0, 0, 255))
visualize_pose_stick(img, poseA_nearest[1], (255, 0, 0))
imshow(img)

In [None]:
images = []
poseB_sample = random.sample(pose_foreground_B, 100)
for idx, poseB in enumerate(poseB_sample):
    poseA_nearest = get_nearest_pose(poseB[1], pose_foreground_A)    
    img = load_frame(video, poseB[0], [])
    visualize_pose_stick(img, poseB[1], (0, 0, 255))
    visualize_pose_stick(img, poseA_nearest[1], (255, 0, 0))
    images.append(img)
    print(idx)

In [None]:
create_montage_from_images(images, '/app/result/NNpose.jpg', 7680, 10)

# Replace foreground player with NN

In [None]:
video.download(segment=(sfid/video.fps, efid/video.fps), output_path='/app/tmp/JZ_foreground_3.mp4')

In [None]:
source_type = 'pix2pix'
foreground_player = 'JZ'

target_frames = []
# _, sfid, efid, _ = match_intervals_B[3]
for (_, sfid, efid, _) in match_intervals_B:
    for target_fid in range(sfid, efid):
        print('target_fid:', target_fid)
        # Load data
        if not target_fid in fid2densepose or not 'segms' in fid2densepose[target_fid]:
            continue
        player_mask = mask_util.decode(fid2densepose[target_fid]['segms'])[..., 0]
        player_bbox = fid2densepose[target_fid]['boxes'][0]
        player_densepose = fid2densepose[target_fid]['keyps'][0]
        player_cropbox = fid2densepose[target_fid]['crop_box']
        
        if not target_fid in fid2openpose_B:
            continue
        player_openpose = fid2openpose_B[target_fid]
        
        target_frame = load_frame(video, target_fid, [])
        
       
        # Inpaint the background 
#         bg_fid = find_nearby_bg_frame(target_fid, player_bbox, match_scene_cls, fid2maskrcnn)
#         print('bg_fid:', bg_fid)
#         if bg_fid is None:
#             continue
#         bg_frame = load_frame(video, bg_fid, [])    
#         player_mask_dil = ndimage.binary_dilation((player_mask > 0), iterations=20)
#         target_frame[player_mask_dil == 1] = bg_frame[player_mask_dil == 1]

        # Stitch nearst player
        if source_type == 'openpose':
            (source_fid, source_pose) = get_nearest_openpose(player_openpose, fid2openpose_A)
            print('source_fid:', source_fid)
            source_frame = load_frame(video, source_fid, [])    
            source_mask = mask_util.decode(fid2densepose[source_fid]['segms'])[..., 0]
        elif source_type == 'densepose':
            (source_fid, source_pose) = get_nearest_densepose(player_densepose, fid2densepose_A)
            print('source_fid:', source_fid)    
            source_frame = load_frame(video, source_fid, [])    
            source_mask = mask_util.decode(fid2densepose[source_fid]['segms'])[..., 0]
        elif source_type == 'pix2pix':
            source_path = '/app/data/image/HW_fake/densepose_{}_{}_{}_synthesized_image.jpg' \
                .format(video_id, target_fid, foreground_player)
            if not os.path.exists(source_path):
                continue
            source_frame_crop = cv2.imread(source_path)
            source_frame = np.zeros_like(target_frame)
            source_frame[player_cropbox[1]:player_cropbox[3], player_cropbox[0]:player_cropbox[2]] = source_frame_crop
            player_mask_dil = source_mask = player_mask

    #     player_mask_dil = ndimage.binary_dilation((source_mask > 0), iterations=5)
        target_frame[player_mask_dil == 1] = source_frame[player_mask_dil == 1]

        # Add target posestick
    #     visualize_densepose_stick(target_frame, player_densepose, (0, 0, 255))
#         visualize_openpose_stick(target_frame, player_openpose, (0, 0, 255))
        
        target_frames.append(target_frame)        

#         imshow(target_frame)
#         break
#     break

In [None]:
videowriter = cv2.VideoWriter('/app/result/replace_foreground_pix2pix_long.avi', cv2.VideoWriter_fourcc('M','J','P','G'), video.fps, (video.width, video.height))
for f in target_frames:
    videowriter.write(f)
videowriter.release()

In [None]:
visualize_labels(bg_frame, fid2maskrcnn[bg_fid])
imshow(bg_frame)

# Generate training data using openpose

In [None]:
hit_dict_split = pickle.load(open('/app/data/pkl/hit_dict.pkl', 'rb'))
densepose_result = pickle.load(open('/app/data/pkl/densepose_result.pkl', 'rb'), encoding='latin1')

In [None]:
from detectron.utils.vis import vis_keypoints

for idx, res in enumerate(densepose_result):
    if 'img_path' not in res:
        continue
    img_path = res['img_path'].replace('img_HW', 'HW/train_B').replace('/Projects/esper_haotian/esper', '')
    if not os.path.exists(img_path):
        continue
    image = np.ones((video.height, video.width, 3), dtype=np.uint8) * 255
    image = vis_keypoints(image, res['keyps'][0].astype(np.int), kp_thresh=2, alpha=1)
#     imshow(image)
#     break
    stick_path = res['densepose_path'].replace('densepose', 'stick').replace('/Projects/esper_haotian/esper', '')
#     print(stick_path)
    crop_box = res['crop_box']
    cv2.imwrite(stick_path, image[crop_box[1] : crop_box[3], crop_box[0] : crop_box[2], :])
#     break
    if idx % 100 == 0:
        print(idx)
    idx += 1