In [1]:
import os
import json
import cv2 as cv
from datetime import datetime, timezone
import numpy as np
import pickle as pkl
from scipy import signal
from scipy import stats, spatial, ndimage

# input block

In [2]:
# input_folder = "/Users/evanpan/Documents/Datasets/Ribhav_processed_dataset/"
input_folder = "F:/MASC/Ribhav_processed_dataset/"
input_file = "Madelaine Petsch audition for The Prom"
shot_id = 1 
speaker_id = 1

In [3]:

input_file_no_space = "_".join(input_file.split(" "))
shot_file_name = os.path.join(os.path.join(os.path.join(input_folder, "shots"), input_file), "shot_cuts.json")
video_path = os.path.join(*[input_folder, "video", input_file+".mp4"])
audio_1_path = os.path.join(os.path.join(os.path.join(input_folder, "JaliTranscript"), input_file_no_space), "shot_{}_{}.wav".format(shot_id, 1))
audio_2_path = os.path.join(os.path.join(os.path.join(input_folder, "JaliTranscript"), input_file_no_space), "shot_{}_{}.wav".format(shot_id, 2))
script_1_path = os.path.join(os.path.join(os.path.join(input_folder, "JaliTranscript"), input_file_no_space), "shot_{}_{}.txt".format(shot_id, 1))
script_2_path = os.path.join(os.path.join(os.path.join(input_folder, "JaliTranscript"), input_file_no_space), "shot_{}_{}.txt".format(shot_id, 2))
praatoutput_1_path = os.path.join(os.path.join(os.path.join(input_folder, "JaliTranscript"), input_file_no_space), "shot_{}_{}_PraatOutput.txt".format(shot_id, 1))
praatoutput_2_path = os.path.join(os.path.join(os.path.join(input_folder, "JaliTranscript"), input_file_no_space), "shot_{}_{}_PraatOutput.txt".format(shot_id, 2))
gaze_direction_json_path = os.path.join(*[input_folder, "L2CSNet", input_file+".json"])
head_direction_json_path = os.path.join(*[input_folder, "pose", input_file+".pkl"])
# out_path = os.path.join(*["/Users/evanpan/Documents/GitHub/Gaze_project/data/look_at_points", "video_annotation.json"])
out_path = ".out.json" 

vid = cv.VideoCapture(video_path)
fps = vid.get(cv.CAP_PROP_FPS)
# load the input shots range
shots = json.load(open(shot_file_name))["shots"]
valid_shots = []
t0 = datetime.strptime("00:00:00.0", "%H:%M:%S.%f").replace(tzinfo=timezone.utc).timestamp()
for i in range(len(shots)):
    start = datetime.strptime(shots[i][0], '%H:%M:%S.%f').replace(tzinfo=timezone.utc).timestamp()
    end = datetime.strptime(shots[i][1], '%H:%M:%S.%f').replace(tzinfo=timezone.utc).timestamp()
    if (end-start) >= 5:
        valid_shots.append([start-t0, end-t0])
shot_used_range = valid_shots[shot_id]
range_low = int(np.round(shot_used_range[0] * fps))
range_high = int(np.round(shot_used_range[1] * fps))
ts = np.arange(0, range_high-range_low)/fps+valid_shots[shot_id][0]

### helper functions (scipy's rotation package sucks)

In [4]:
def rotation_angles_frome_positions(arr):
    """
    converts an array of positions to an array of rotation angles (azimuth, elevation)
    centered at the origin, where:
        azimuth: +right,-left
        elevation: +up,-down
    here we assume that the input vectors are in world coordinates
    :param arr: array with shape (N, 3)
    :return: array with shape (N, 2)
    """
    # F: arr (N, 3) -> arr (N, 2) or arr (3, ) -> (2, )
    # in the output is in the convention of (azimuth, elevation)
    if len(arr.shape) == 2:
        mag = np.sqrt(np.sum(arr * arr, axis=1, keepdims=True))
        out = arr / mag
        out[:, 0] = np.arcsin(out[:, 0])
        out[:, 1] = np.arcsin(out[:, 1])
        return out[:, 0:2] * 180 / np.pi
    else:
        mag = np.sqrt(np.sum(arr * arr))
        out = arr / mag
        out[0] = np.arcsin(out[0])
        out[1] = np.arcsin(out[1])
        return out[0:2] * 180 / np.pi

In [5]:
# head data
all_head_data = pkl.load(open(head_direction_json_path, "rb"))
head_angle_data = all_head_data["HEAD"]
head_rotmat_per_frame = head_angle_data["ROTMAT"]
head_euler_per_frame = head_angle_data["EULER"]
head_angle_per_frame = []
neutral_position = np.array([0, 0, 100])
for i in range(0, head_rotmat_per_frame.shape[0]):
    pos = head_rotmat_per_frame[i] @ neutral_position
    head_angle_per_frame.append(rotation_angles_frome_positions(pos[:]))
head_angle_per_frame = np.array(head_angle_per_frame)[range_low:range_high]
# gaze data
f = open(gaze_direction_json_path, "rb")
all_gaze_data = pkl.load(f)
gaze_angle_data = all_gaze_data["RAW_GAZE"]
gaze_angle_per_frame = gaze_angle_data["EULER"][range_low:range_high]
gaze_rotmat_per_frame = gaze_angle_data["ROTMAT"][range_low:range_high]
blinks = all_head_data["BLINKS"][range_low:range_high]


In [None]:
gesture_only_head_motion = []
for i in range(0, 2):
    head = head_angle_per_frame[:, i]
    head_smooth = ndimage.gaussian_filter1d(head, sigma=10)
    delta_head = head-head_smooth
    gesture_only_head_motion.append(delta_head)
# export them to a json file so I can see what they look like in maya
out_json = {"ts":ts.tolist(), 
            "no_gaze_head":[ndimage.gaussian_filter1d(gesture_only_head_motion[0], sigma=3).tolist(), 
                            ndimage.gaussian_filter1d(gesture_only_head_motion[1], sigma=3).tolist()],
            "all_head":[ndimage.gaussian_filter1d(head_angle_per_frame[:, 0], sigma=3).tolist(), 
                            ndimage.gaussian_filter1d(head_angle_per_frame[:, 1], sigma=3).tolist()], }

json.dump(out_json, open(out_path, "w"))
out_path