In [1]:
import numpy as np
import re
import pandas as pd

In [2]:
def all_remove(xlist, remove):
    return [value for value in xlist if value != remove]

In [3]:
def rle(sequence):
    comp_seq_index, = np.concatenate(([True], sequence[1:] != sequence[:-1], [True])).nonzero()
    return sequence[comp_seq_index[:-1]], np.ediff1d(comp_seq_index)

In [4]:
filepath = "../video_stimuli/eyeclip_annotation.txt"
with open(filepath, "r", encoding="utf-8") as f:
    line = f.readlines()

line_delete = all_remove(line, "\n")
print(len(line_delete))

283


In [5]:
index = np.where([l == ' \n' for l in line_delete])[0]
index = np.hstack([-np.ones(1, dtype=int), index, len(line_delete)])
clips_data = [line_delete[index[i]+1:index[i+1]] for i in range(len(index)-1)]

In [6]:
obj_pos = np.load("../preprocessed_data/obj_pos_upsample.npz")["data"]
obj_pos_nonnan = 1*(np.sum(~np.isnan(obj_pos), axis=(0,1)) > 1)
value, length = rle(obj_pos_nonnan)
csl = np.cumsum(length)
begin_resampled_indices = np.r_[0, csl[1::2][:-1]]
end_resampled_indices = csl[::2]

In [7]:
num_clip = len(clips_data)

In [8]:
object_parts = ["subj", "obj", "eye", "mouth", "nose", "ear", "face", "hand"]
labels = ["clip_index", "clip_name", "start_index", "end_index", "start_resampled_index", "end_resampled_index", "object_name", "object_id"]

In [9]:
pre_df = []
for clip_idx in range(len(clips_data)):
    c_data = clips_data[clip_idx]
    num_case = ((len(c_data) - 1) //2)
    
    # extract clip information
    clip_info = c_data[0].lstrip().split(" ")
    print(clip_info)
    clip_name = clip_info[1]
    start_idx = int(clip_info[2].strip("["))
    end_idx = int(clip_info[3].partition("]")[0])
    start_resample_idx = begin_resampled_indices[clip_idx]
    end_resample_idx = end_resampled_indices[clip_idx]
    
    # get each case information
    for i in range(num_case):
        case_info = c_data[1+2*i].lstrip().split("'")
        case_name = case_info[1]

        case_data = c_data[2+2*i].lstrip()
        case_id = i
        obj_bool = []
        for op in object_parts:
            m = re.findall(op+r'=[0-9]', case_data)
            obj_bool.append(m[0][-1])
        pre_df.append([clip_idx+1, clip_name, start_idx, end_idx, start_resample_idx, end_resample_idx, case_name, case_id] + obj_bool)

["'%1", 'しょうこゆうぞう_opening', '[1', "140]'\n"]
["'%2", 'English_5_people', '[156', "311]'\n"]
["'%3", 'AI', '[327', "539]'\n"]
["'%4", '三丁目_芋', '[554', "740]'\n"]
["'%5", 'お兄さん体操_ending', '[773', "911]'\n"]
["'%6", '三丁目_汽車女学生3名', '[927', "1094]'\n"]
["'%7", 'ガシャンガシャンウィーン', '[1124', "1319]'\n"]
["'%8", '三丁目_高円寺', '[1335', "1651]'\n"]
["'%9", 'ぞうさんのあくび', '[1667', "1835]'\n"]
["'%10", '三丁目_3名東京だなー', '[1852', "1988]'\n"]
["'%11", 'パジャマでおじゃま', '[2004', "2168]'\n"]
["'%12", 'ざくろみたい', '[2183', "2326]'\n"]


In [10]:
df = pd.DataFrame(pre_df, columns=labels + object_parts)

In [11]:
df

Unnamed: 0,clip_index,clip_name,start_index,end_index,start_resampled_index,end_resampled_index,object_name,object_id,subj,obj,eye,mouth,nose,ear,face,hand
0,1,しょうこゆうぞう_opening,1,140,0,233,Left しょうこ right eye,0,1,0,1,0,0,0,1,0
1,1,しょうこゆうぞう_opening,1,140,0,233,Left しょうこ left eye,1,1,0,2,0,0,0,1,0
2,1,しょうこゆうぞう_opening,1,140,0,233,Left しょうこ mouth,2,1,0,0,1,0,0,1,0
3,1,しょうこゆうぞう_opening,1,140,0,233,Left しょうこ right hand,3,1,0,0,0,0,0,0,1
4,1,しょうこゆうぞう_opening,1,140,0,233,Left ゆうぞう right eye,4,2,0,1,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
125,12,ざくろみたい,2183,2326,3641,3882,Girl1 left eye,5,2,0,2,0,0,0,1,0
126,12,ざくろみたい,2183,2326,3641,3882,Girl1 mouth,6,2,0,0,1,0,0,1,0
127,12,ざくろみたい,2183,2326,3641,3882,Girl2 right eye,7,1,0,1,0,0,0,1,0
128,12,ざくろみたい,2183,2326,3641,3882,Girl2 left eye,8,1,0,2,0,0,0,1,0


In [12]:
df.to_csv("../preprocessed_data/eyeclip_annotation.csv", index=False,encoding='utf-8')#cp932')