# Query Poses

In [None]:
from esper.rekall_query import *
from esper.supercut import *
import cv2
import random

In [None]:
video_id = 65
video = Video.objects.filter(id=video_id)[0]
video_ids = [video_id]

# Find frames with people

In [None]:
LABELED_TAG, _ = Tag.objects.get_or_create(name='openpose:labeled')
frames_with_pose = Frame.objects.filter(video_id=video_id, tags=LABELED_TAG)
num_frame_with_pose = len(frames_with_pose)
print("{} frames contain poses out of {} frames ({:.02f}%)".format(num_frame_with_pose, video.num_frames,
                                                             100. * num_frame_with_pose/video.num_frames))

# Find frames with only two people

In [None]:
NUM_PERSON_IN_FRAME = 2

two_people_intrvlcol_all = VideoIntervalCollection.from_django_qs(
    Pose.objects.filter(frame__video__id__in=video_ids) \
    .annotate(video_id=F('frame__video_id')) \
    .annotate(min_frame=F('frame__number')) \
    .annotate(max_frame=F('frame__number')),
    with_payload=lambda obj: [VideoIntervalCollection.django_accessor(obj, 'id')] 
    ).coalesce(payload_merge_op=payload_plus).filter(payload_satisfies(length_exactly(NUM_PERSON_IN_FRAME)))


In [None]:
two_people_intrvlcol = \
    two_people_intrvlcol_all \
        .dilate(5) \
        .coalesce(payload_merge_op=payload_plus) \
        .dilate(-5) \
        .filter_length(min_length=25)

two_people_intrvlcol_all = \
    two_people_intrvlcol_all \
        .dilate(10) \
        .coalesce() \
        .dilate(-10) 
count_intervals(two_people_intrvlcol_all)

In [None]:
num_frames_in_match = count_duration(match_intrvlcol)
print("{} frames are in match out of {} frames ({:.02f}%)".format(num_frames_in_match, video.num_frames,
                                                           100. * num_frames_in_match/video.num_frames))

In [None]:
esper_widget(intrvlcol2result(two_people_intrvlcol, flat=True), use_jupyter_keybindings=True)

## Create montage 

In [None]:
match_intrvllist = match_intrvlcol.get_intervallist(video_id)
create_montage(intrvlcol2list(match_intrvllist), out_path='/app/result/pose_montage.jpg',
                  width=2160, num_cols=10,
                  )

In [None]:
intervals = [(video_id, i.start, i.end, -1) for i in match_intrvllist.get_intervals()]

In [None]:
stitch_video_temporal(intervals, '/app/result/pose_montage.mp4')

In [None]:
intervals_selected = intervals[30:55]

In [None]:
video_path = '/app/result/pose_montage.avi'
audio_path = '/app/result/pose_montage.wav'

In [None]:
stitch_video_spatial(intervals_selected, out_path=video_path, align=False, 
                     width=1920, num_cols=5, target_height = 1920 // 5 * 9 // 16)

In [None]:
mix_audio(intervals_selected, out_path=audio_path, decrease_volume=7, align=False, dilation=0)

In [None]:
concat_video_audio(video_path, audio_path, '/app/result/pose_montage-mix.mp4')

# Segment Match Scene

In [None]:
with open('/app/data/histogram/{:07d}.bin'.format(video_id), 'rb') as f:
    feature_all = np.frombuffer(f.read(), dtype=np.int).reshape((-1, 3*16))

In [None]:
two_people_intrvlcol_long = two_people_intrvlcol.filter_length(min_length=100)

In [None]:
match_intervals = intrvlcol2list(two_people_intrvlcol_long, with_duration=True, sort_by_duration=True)

In [None]:
manual_select_candidates(interval2result(match_intervals))

In [None]:
NUM_SAMPLE_TRAIN = 1000

In [None]:
# positive samples
selected = [0, 2, 3, 7, 6, 5, 4, 8, 9, 10, 11, 15, 14, 13, 12, 16, 17, 18, 19, 23, 22, 21, 20, 24, 25, 26, 27, 31, 29, 32, 33, 34, 35, 39, 38, 36, 40, 41, 43, 47, 45, 49, 48]
positive_intervals = [match_intervals[id] for id in selected]
num_sample = 0
POSITIVE_STRIDE = 5
# positive_fids_all = [fid for (vid, sfid, efid, duration) in positive_intervals 
#                  for fid in range(sfid+5, efid-5, POSITIVE_STRIDE)]
positive_fids_all = [fid for (vid, sfid, efid, duration) in positive_intervals 
                 for fid in range(sfid, efid)]
print("Total positive samples ", len(positive_fids_all))
random.shuffle(positive_fids_all)
positive_fids = positive_fids_all[: NUM_SAMPLE_TRAIN]
positive_feature = [feature_all[fid] for fid in positive_fids]
positive_label = [1] * NUM_SAMPLE_TRAIN

In [None]:
positive_frames = VideoIntervalCollection({video_id: IntervalList([(fid, fid, 0) for fid in positive_fids])})
esper_widget(intrvlcol2result(positive_frames, flat=True))

In [None]:
create_montage([(video_id, fid, fid) for fid in positive_fids], out_path='/app/result/positive_montage.jpg',
                  width=2160, num_cols=35)

In [None]:
# negative samples
# NEGATIVE_STRIDE = 20
# video_intrvlcol = VideoIntervalCollection({video_id : IntervalList([(0, video.num_frames-1, 0)])})
# non_match_intrvlcol = video_intrvlcol.minus(two_people_intrvlcol_all)
# negative_intervals = intrvlcol2list(non_match_intrvlcol)
# negative_fids_all = [fid for (vid, sfid, efid, duration) in negative_intervals 
#                     for fid in range(sfid+5, efid-5, NEGATIVE_STRIDE)]
positive_fids_large = [fid for interval in two_people_intrvlcol_all.get_intervallist(video_id).get_intervals() 
                          for fid in range(interval.start, interval.end+1)]
negative_fids_all = [fid for fid in range(0, video.num_frames)
                    if fid not in positive_fids_large]
print("Total negative samples ", len(negative_fids_all))
random.shuffle(negative_fids_all)
negative_fids = negative_fids_all[: NUM_SAMPLE_TRAIN]
negative_feature = [feature_all[fid] for fid in negative_fids]
negative_label = [0] * NUM_SAMPLE_TRAIN

In [None]:
negative_frames = VideoIntervalCollection({video_id: IntervalList([(fid, fid, 0) for fid in negative_fids])})
esper_widget(intrvlcol2result(negative_frames, flat=True))

In [None]:
negative_frames = Pose.objects.filter(frame__video_id=video_id, frame__number__in=negative_fids)
len(negative_frames)
esper_widget(qs_to_result(negative_frames, stride=1, limit=1000))

In [None]:
create_montage([(video_id, fid, fid) for fid in negative_fids], out_path='/app/result/negative_montage.jpg',
                  width=2160, num_cols=35)

In [None]:
# train model
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=100)

# Train the model using the training sets
model.fit(positive_feature + negative_feature, positive_label + negative_label)

In [None]:
#Predict Output
predicted_label = model.predict(feature_all) 

In [None]:
MATCH_THRESH = 0.9
def filter_by_histogram(interval):
    sfid, efid = interval.start, interval.end
    predicted_interval = [predicted_label[fid] for fid in range(sfid, efid)]
    return 1. * np.sum(predicted_interval) / (efid - sfid) > MATCH_THRESH
match_scene_intrvlcol = two_people_intrvlcol.filter(filter_by_histogram)
count_intervals(match_scene_intrvlcol)

In [None]:
esper_widget(intrvlcol2result(match_scene_intrvlcol, flat=True), use_jupyter_keybindings=True)

In [None]:
create_montage(intrvlcol2list(match_scene_intrvlcol, with_duration=False), out_path='/app/result/match_scene_montage.jpg',
                  width=2160, num_cols=10)

In [None]:
pickle.dump(match_scene_intrvlcol, open('/app/result/match_scene_intrvlcol.pkl', 'wb'))

# Segment Point

## Detect sound peak

In [None]:
match_scene_intrvlcol = pickle.load(open('/app/result/match_scene_intrvlcol.pkl', 'rb'))

In [None]:
_, sfid, efid, d = intrvlcol2list(match_scene_intrvlcol, sort_by_duration=True)[0]
audio_path = video.extract_audio(segment=(1.*sfid/video.fps, 1.*efid/video.fps))

In [None]:
# analyze sound
import matplotlib.pyplot as plt
import wave
import sys

spf = wave.open(audio_path,'r')

#Extract Raw Audio from Wav File
signal = spf.readframes(-1)
signal = np.fromstring(signal, 'Int16')
fs = spf.getframerate()
print(fs)

#If Stereo
if spf.getnchannels() == 2:
    print('Just mono files')


Time=np.linspace(0, len(signal)/fs, num=len(signal))

plt.figure(1)
plt.title('Signal Wave...')
plt.plot(Time,signal)
plt.show()

## Detect foot motion

In [None]:
match_scene_intervals = intrvlcol2list(match_scene_intrvlcol, sort_by_duration=True)
_, sfid, efid, d = match_scene_intervals[9]

In [None]:
esper_widget(interval2result(match_scene_intervals[9:10]), disable_caption=True)

In [None]:
def group_pose(interval):
    sfid, efid = interval[1:3]
    
    poses = Pose.objects.filter(frame__video_id=video_id) \
    .filter(frame__number__gte=sfid) \
    .filter(frame__number__lte=efid) \
    
    fid2pose = {}
    for pose in poses:
        fid = pose.frame.number
        if fid not in fid2pose:
            fid2pose[fid] = []
        fid2pose[fid].append(pose)
        if len(fid2pose[fid]) > 2:
            print(fid)
            
    foreground_pose = {}
    background_pose = {}
    for fid in sorted(fid2pose):
        pose_list = fid2pose[fid]
        if len(pose_list) == 2:
            poseA, poseB = pose_list[0], pose_list[1]
            poseA_neck = poseA._format_keypoints()[Pose.Neck]
            poseB_neck = poseB._format_keypoints()[Pose.Neck]
        #     draw_keypoints(video, fid, [poseA_neck, poseB_neck])
            if poseA_neck[1] >= poseB_neck[1]:
                foreground_pose[fid] = poseA
                background_pose[fid] = poseB
            else:
                foreground_pose[fid] = poseB
                background_pose[fid] = poseA
    return foreground_pose, background_pose

In [None]:
sfid, efid = match_scene_intervals[9][1:3]
# video.download(segment=(sfid/video.fps, efid/video.fps), output_path='/app/tmp/point.mp4')

In [None]:
poses = Pose.objects.filter(frame__video_id=video_id) \
    .filter(frame__number__gte=sfid) \
    .filter(frame__number__lte=efid) \

In [None]:
fid2pose = {}
for pose in poses:
    fid = pose.frame.number
    if fid not in fid2pose:
        fid2pose[fid] = []
    fid2pose[fid].append(pose)
    if len(fid2pose[fid]) > 2:
        print(fid)

In [None]:
foreground_pose = {}
background_pose = {}
for fid in sorted(fid2pose):
    pose_list = fid2pose[fid]
    if len(pose_list) == 2:
        poseA, poseB = pose_list[0], pose_list[1]
        poseA_neck = poseA._format_keypoints()[Pose.Neck]
        poseB_neck = poseB._format_keypoints()[Pose.Neck]
    #     draw_keypoints(video, fid, [poseA_neck, poseB_neck])
        if poseA_neck[1] >= poseB_neck[1]:
            foreground_pose[fid] = poseA
            background_pose[fid] = poseB
        else:
            foreground_pose[fid] = poseB
            background_pose[fid] = poseA

In [None]:
joints_to_plot = [Pose.Neck, Pose.LShoulder, Pose.LElbow, Pose.LWrist, Pose.LHip, Pose.LKnee, Pose.LAnkle]
joints_to_plot_name = ['Neck', 'LShoulder', 'LElbow', 'LWrist', 'LHip', 'LKnee', 'LAnkle']
def get_trajectory(fid2pose):
    trajectory_X = {joint: [] for joint in joints_to_plot}
    trajectory_Y = {joint: [] for joint in joints_to_plot}
    xtick_X = {joint: [] for joint in joints_to_plot}
    xtick_Y = {joint: [] for joint in joints_to_plot}
    for fid in sorted(fid2pose):
        keypoints = fid2pose[fid]._format_keypoints()
        for joint in joints_to_plot:
            X = keypoints[joint][0]
            if 0 < X and X < 1: 
                trajectory_X[joint].append(X)
                xtick_X[joint].append(fid)
            Y = keypoints[joint][1]
            if 0 < Y and Y < 1:
                trajectory_Y[joint].append(Y)
                xtick_Y[joint].append(fid)
    return xtick_X, xtick_Y, trajectory_X, trajectory_Y

In [None]:
plt.figure()
xtick_X, xtick_Y, trajectory_X, trajectory_Y = get_trajectory(foreground_pose)
for id, joint in enumerate(joints_to_plot):
    plt.plot(xtick_X[joint], trajectory_X[joint], label=joints_to_plot_name[id] + 'X')
    plt.plot(xtick_Y[joint], trajectory_Y[joint], '--', label=joints_to_plot_name[id] + 'Y')
legend = plt.legend(loc='lower right')
plt.figure()
xtick_X, xtick_Y, trajectory_X, trajectory_Y = get_trajectory(background_pose)
for id, joint in enumerate(joints_to_plot):
    plt.plot(xtick_X[joint], trajectory_X[joint], label=joints_to_plot_name[id] + 'X')
    plt.plot(xtick_Y[joint], trajectory_Y[joint], '--', label=joints_to_plot_name[id] + 'Y')
legend = plt.legend(loc='lower right')

# separate foreground background

In [None]:
def collect_histogram(pose_list):
    def get_histogram(i):
        fid, pose = pose_list[i]
        img = load_frame(video, fid, [])
        H, W = img.shape[:2]
        keypoints = pose._format_keypoints()
        poly_vertices =[keypoints[Pose.LShoulder][:2], keypoints[Pose.LHip][:2], \
                        keypoints[Pose.RHip][:2], keypoints[Pose.RShoulder][:2]]
        poly_vertices = np.array([(int(pt[0]*W), int(pt[1]*H)) for pt in poly_vertices])
        mask = np.zeros((H, W))
#         cv2.fillConvexPoly(mask, poly_vertices, 1)
        cv2.fillConvexPoly(img, poly_vertices, (255, 255, 255))
        imshow(img)
        
        cloth = img[mask > 0]
        width = int(np.sqrt(cloth.shape[0]))
        cloth = cloth[:width * width].reshape((width, width, 3))
        hist_channel = []
        for i in range(3):
            hist_channel.append(cv2.calcHist([cloth], [i], None, [16], [0,256]))
        hist = np.vstack((hist_channel[0], hist_channel[1], hist_channel[2])).reshape(48)
        return hist / (1.*width*width)
    get_histogram(0)    
#     hist_all = par_for(get_histogram, [i for i in range(len(pose_list))])
#     return hist_all

In [None]:
fore = [(fid, pose) for fid, pose in foreground_pose.items()]
collect_histogram(fore)

In [None]:
hist_foreground = collect_histogram(foreground_pose)
hist_background = collect_histogram(background_pose)

In [None]:
# train model
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=100)

# Train the model using the training sets
model.fit(hist_foreground + hist_background, [1]*len(hist_foreground) + [0]*len(hist_background))

In [None]:
foreground_match_scene_intervals = []
background_match_scene_intervals = []

for interval in match_scene_intervals:
    foreground_pose, background_pose = group_pose(interval)
    pose_list = [(fid, pose) for fid, pose in foreground_pose.items()]
    pose_list_sample = random.sample(pose_list, 50)
    hist_fore = collect_histogram(pose_list_sample)
    
    predicted_label = model.predict(hist_fore)
    if 1. * np.sum(predicted_label) / len(predicted_label) > 0.5:
        foreground_match_scene_intervals.append(interval)
    else:
        background_match_scene_intervals.append(interval)
    print("Foreground", 1. * np.sum(predicted_label) / len(predicted_label))
        

In [None]:
create_montage(foreground_match_scene_intervals, out_path='/app/result/foreground_montage.jpg',
                  width=2160, num_cols=6)

In [None]:
create_montage(background_match_scene_intervals, out_path='/app/result/background_montage.jpg',
                  width=2160, num_cols=6)