In [1]:
import numpy as np
import json
import os
import matplotlib.pyplot as plt
import time
import pickle

In [2]:
'''
Data organization

db = {
    'video_name': {
        'pedestrian_id': { # track_id-#
            'frames': [0, 1, 2, ...], # target pedestrian appeared frames
            'cv_annotations': {
                'track_id': track_id, 
                'bbox': [xtl, ytl, xbr, ybr], 
                'skeleton': {
                    'left_ankle': []
                    'right_knee': [],
                }
            },
            'nlp_annotations': {
                vid_uid_pair: {'intent': [], 'description': [], 'key_frame': []},
                ...
            }
        }
    }
}
'''

"\nData organization\n\ndb = {\n    'video_name': {\n        'pedestrian_id': { # track_id-#\n            'frames': [0, 1, 2, ...], # target pedestrian appeared frames\n            'cv_annotations': {\n                'track_id': track_id, \n                'bbox': [xtl, ytl, xbr, ybr], \n                'skeleton': {\n                    'left_ankle': []\n                    'right_knee': [],\n                }\n            },\n            'nlp_annotations': {\n                vid_uid_pair: {'intent': [], 'description': [], 'key_frame': []},\n                ...\n            }\n        }\n    }\n}\n"

In [3]:
dataroot = '/home/scott/Work/Toyota/datasets/PSI200/annotations/'
key_frame_folder = 'cognitive_annotation_key_frame'
extended_folder = 'cognitive_annotation_extended'
splits_folder = '/home/scott/Work/Toyota/datasets/baselines/splits/'

In [4]:
def add_ped_case(db, video_name, ped_name, nlp_vid_uid_pairs):
    if video_name not in db:
        db[video_name] = {}
    
    db[video_name][ped_name] = { # pedId is 'track_id' in cv-annotation
            'frames': None, # [] list of frame_idx of the target pedestrian appear
            'cv_annotations': {
                'track_id': ped_name,
                'bbox': [] # [] list of bboxes, each bbox is [xtl, ytl, xbr, ybr]
            },
            'nlp_annotations': { 
                # [vid_uid_pair: {'intent': [], 'description': [], 'key_frame': []}]
            }
        }
    for vid_uid in nlp_vid_uid_pairs:
            db[video_name][ped_name]['nlp_annotations'][vid_uid] = {
                'intent': [],
                'description': [],
                'key_frame': [] # 0: not key frame (expanded from key frames NLP annotations) 1: key frame (labeled by NLP annotations)
            }

In [5]:
with open(os.path.join(splits_folder, 'PSI200_split.json')) as f: 
# ['PSI100_split.json', 'PSI200_split.json', 'PSI200_split_paper.json']
    datasplits = json.load(f)

In [6]:
def init_db(video_list):
    db = {}
#     data_split = 'train' # 'train', 'val', 'test'
    for video_name in sorted(video_list):
        try:
            with open(os.path.join(dataroot, extended_folder, video_name, 'pedestrian_intent.json'), 'r') as f:
                # key_frame_folder
                annotation = json.load(f)
        except:
            print("Error loading pedestrian intent annotation")
            continue
    #     print(vname)
        db[video_name] = {}
        for ped in annotation['pedestrians'].keys():
    #         print(ped)
            pedId = ped
            observed_frames = annotation['pedestrians'][ped]['observed_frames']
            bbox_list = annotation['pedestrians'][ped]['cv_annotations']
            cog_annotation = annotation['pedestrians'][ped]['cognitive_annotations']
            nlp_vid_uid_pairs = cog_annotation.keys()
    #         for vid_uid in cog_annotation.keys():
            add_ped_case(db, video_name, pedId, nlp_vid_uid_pairs)
    return db

In [7]:
split_name = 'train' # 'train', 'val', 'test'
db = init_db(sorted(datasplits[split_name])) 

In [8]:
db.keys()

dict_keys(['video_0001', 'video_0002', 'video_0003', 'video_0004', 'video_0005', 'video_0006', 'video_0007', 'video_0008', 'video_0009', 'video_0010', 'video_0011', 'video_0012', 'video_0013', 'video_0014', 'video_0015', 'video_0016', 'video_0017', 'video_0018', 'video_0019', 'video_0020', 'video_0021', 'video_0022', 'video_0023', 'video_0024', 'video_0025', 'video_0026', 'video_0027', 'video_0028', 'video_0029', 'video_0030', 'video_0031', 'video_0032', 'video_0033', 'video_0034', 'video_0035', 'video_0036', 'video_0037', 'video_0038', 'video_0039', 'video_0040', 'video_0041', 'video_0042', 'video_0043', 'video_0044', 'video_0045', 'video_0046', 'video_0047', 'video_0048', 'video_0049', 'video_0050', 'video_0051', 'video_0052', 'video_0053', 'video_0054', 'video_0055', 'video_0056', 'video_0057', 'video_0058', 'video_0059', 'video_0060', 'video_0061', 'video_0062', 'video_0063', 'video_0065', 'video_0066', 'video_0067', 'video_0068', 'video_0069', 'video_0070', 'video_0071', 'video_00

## Add CV Annotations

In [9]:
# def interpolate_small_missing_frames(frame_list, bbox_list, threshold = 5):
#     # If missing frames less than 5, fill missing frames and bboxes by interpolation
#     min_fid, max_fid = frame_list[0], frame_list[-1]
#     missing = []
#     for i in range(min_fid, max_fid+1):
#         if i not in frame_list:
#             missing.append(i)
# #                             print("missing frame: ", i)
#     if len(missing) < threshold:
#         pass
#     pass
def split_frame_lists(frame_list, bbox_list, threshold = 60):
    frame_res = [] # store list of splits sliced by the missing frames
    bbox_res = []
    inds_res = []
    
    inds_split = [0]
    frame_split = [frame_list[0]] # frame list
    bbox_split = [bbox_list[0]] # bbox list
    for i in range(1, len(frame_list)):
        if frame_list[i] - frame_list[i-1] == 1:
            inds_split.append(i)
            frame_split.append(frame_list[i])
            bbox_split.append(bbox_list[i])
        else: # missing observed
            if len(frame_split) > threshold: # next position frame is missing
                inds_res.append(inds_split)
                frame_res.append(frame_split)
                bbox_res.append(bbox_split)
                inds_split = []
                frame_split = []
                bbox_split = []
            else: # ignore splits that are too short
                inds_split = []
                frame_split = []
                bbox_split = []
    # break loop when i reaches the end of list
    if len(frame_split) > threshold: # reach the end
        inds_res.append(inds_split)
        frame_res.append(frame_split)
        bbox_res.append(bbox_split)
        inds_split = []
        frame_split = []
        bbox_split = []
            
#     print(f"{len(frame_res)} splits: ", [len(s) for s in frame_res])
    return frame_res, bbox_res, inds_res

In [10]:
def get_intent_des(db, vname, pid, split_inds, cog_annt):
    # split_inds: is the list of indices of the intent_annotations for the current split of pid in vname
#     print(vname, pid, split_inds)
    for vid_uid in cog_annt.keys():
#         print(vid_uid)
        intent_list = cog_annt[vid_uid]['intent']
        description_list = cog_annt[vid_uid]['description']
        key_frame_list = cog_annt[vid_uid]['key_frame']
        
        nlp_vid_uid = vid_uid
        db[vname][pid]['nlp_annotations'][nlp_vid_uid]['intent'] = [intent_list[i] for i in split_inds]
        db[vname][pid]['nlp_annotations'][nlp_vid_uid]['description'] = [description_list[i] for i in split_inds]
        db[vname][pid]['nlp_annotations'][nlp_vid_uid]['key_frame'] = [key_frame_list[i] for i in split_inds]
#         print(nlp_vid_uid, len(intent_list), len(db[vname][pid]['nlp_annotations'][nlp_vid_uid]['intent']))
#         print(db[vname][pid]['nlp_annotations'][nlp_vid_uid]['intent'])
        

In [11]:
def update_db_annotations(db):
    video_list = sorted(db.keys())
    for video_name in video_list:
        ped_list = list(db[video_name].keys())
        tracks = list(db[video_name].keys())
        try:
            with open(os.path.join(dataroot, extended_folder, video_name, 'pedestrian_intent.json'), 'r') as f:
                # key_frame_folder
                annotation = json.load(f)
        except:
            print("Error loading pedestrian intent annotation")
            continue

        for pedId in ped_list:
    #         print(pedId, ped_list)
            observed_frames = annotation['pedestrians'][pedId]['observed_frames']
            observed_bboxes = annotation['pedestrians'][pedId]['cv_annotations']['bboxes']
            cog_annotation = annotation['pedestrians'][pedId]['cognitive_annotations']
            if len(observed_frames) == observed_frames[-1] - observed_frames[0] + 1:
                threshold=60
                if len(observed_frames) > 60:
                    cv_frame_list = observed_frames
                    cv_frame_box = observed_bboxes
                    db[video_name][pedId]['frames'] = cv_frame_list
                    db[video_name][pedId]['cv_annotations']['bbox'] = cv_frame_box
                    get_intent_des(db, video_name, pedId, [*range(len(observed_frames))], cog_annotation)
                else: # too few frames observed
                    print("Single ped occurs too short.", video_name, pedId, len(observed_frames))
                    del db[video_name][pedId]
            else:
                print("missing frames bbox noticed! ", video_name, pedId, len(observed_frames), ' frames observed from ', 
                      observed_frames[-1] - observed_frames[0] + 1)
                cv_frame_list, cv_frame_box, cv_split_inds = split_frame_lists(observed_frames, observed_bboxes, 
                                                                                 threshold=60)
                if len(cv_split_inds) == 0:
                    print(video_name, ' ', pedId, "After removing missing frames, not split left!")
                    del db[video_name][pedId]
#                     print(observed_frames)
                elif len(cv_split_inds) == 1:
                    db[video_name][pedId]['frames'] = cv_frame_list[0]
                    db[video_name][pedId]['cv_annotations']['bbox'] = cv_frame_box[0]
                    get_intent_des(db, video_name, pedId, cv_split_inds[0], cog_annotation)
                else:
                    # multiple splits left after removing missing box frames
                    print(f"{len(cv_frame_list)} splits: ", [len(s) for s in cv_frame_list])
                    print('-----------------')
                    nlp_vid_uid_pairs = db[video_name][pedId]['nlp_annotations'].keys()
                    for i in range(len(cv_frame_list)):
                        ped_splitId = pedId + '-' + str(i)
                        add_ped_case(db, video_name, ped_splitId, nlp_vid_uid_pairs)
                        db[video_name][ped_splitId]['frames'] = cv_frame_list[i]
                        db[video_name][ped_splitId]['cv_annotations']['bbox'] = cv_frame_box[i]
                        get_intent_des(db, video_name, ped_splitId, cv_split_inds[i], cog_annotation)
    #                     if video_name == 'video_0005' and ped_splitId == 'track_1-0':
    #                         print(video_name, pedId, ped_splitId, cv_split_inds[i])
    #                         raise Exception("ERROR!")
                        if len(db[video_name][ped_splitId]['nlp_annotations'][list(db[video_name][ped_splitId]['nlp_annotations'].keys())[0]]['intent']) == 0:
#                             print(video_name, ped_splitId, cv_split_inds[i])
#                             print(db[video_name][ped_splitId]['nlp_annotations'])
                            raise Exception("ERROR!")
                    del db[video_name][pedId]
            tracks.remove(pedId)
        if len(db[video_name].keys()) < 1:
            print("!!!!! Video ", video_name, " has no valid ped sequence! Remove this video!")
            del db[video_name]
        if len(tracks) > 0:
            print(f"{video_name} missing pedestrian {tracks} ")

In [12]:
update_db_annotations(db)

missing frames bbox noticed!  video_0005 track_1 170  frames observed from  217
2 splits:  [61, 70]
-----------------
missing frames bbox noticed!  video_0005 track_14 257  frames observed from  324
2 splits:  [104, 74]
-----------------
missing frames bbox noticed!  video_0005 track_55 289  frames observed from  308
missing frames bbox noticed!  video_0005 track_0 195  frames observed from  292
missing frames bbox noticed!  video_0005 track_15 168  frames observed from  326
video_0005   track_15 After removing missing frames, not split left!
missing frames bbox noticed!  video_0005 track_2 179  frames observed from  258
2 splits:  [65, 71]
-----------------
missing frames bbox noticed!  video_0005 track_3 235  frames observed from  257
2 splits:  [64, 153]
-----------------
missing frames bbox noticed!  video_0005 track_4 224  frames observed from  252
2 splits:  [87, 110]
-----------------
missing frames bbox noticed!  video_0005 track_5 94  frames observed from  157
video_0005   tra

In [19]:
db['video_0107'].keys()

dict_keys(['track_0', 'track_2', 'track_61'])

In [13]:
video_name = 'video_0005'
try:
    with open(os.path.join(dataroot, extended_folder, video_name, 'pedestrian_intent.json'), 'r') as f:
        # key_frame_folder
        annotation = json.load(f)
except:
    print("Error loading pedestrian intent annotation")

pedId = 'track_1'
observed_frames = annotation['pedestrians'][pedId]['observed_frames']
observed_bboxes = annotation['pedestrians'][pedId]['cv_annotations']['bboxes']
cog_annotation = annotation['pedestrians'][pedId]['cognitive_annotations']
for vid_uid in cog_annotation.keys():
    intent_list = cog_annotation[vid_uid]['intent']
    description_list = cog_annotation[vid_uid]['description']
    key_frame_list = cog_annotation[vid_uid]['key_frame']
    print(len(intent_list))

170
170
170
170
170
170
170
170
170
170


In [14]:
for v in sorted(db.keys()):
    print(v, ', ped num: ', len(db[v].keys()))
    for p in db[v].keys():
        print('     ', p, len(db[v][p]['frames']))
        if len(db[v][p]['frames']) < 60:
            print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')

video_0001 , ped num:  1
      track_0 123
video_0002 , ped num:  1
      track_8 161
video_0003 , ped num:  3
      track_131 64
      track_143 189
      track_15 316
video_0004 , ped num:  1
      track_22 98
video_0005 , ped num:  17
      track_55 237
      track_71 287
      track_0 61
      track_6 65
      track_72 103
      track_1-0 61
      track_1-1 70
      track_14-0 104
      track_14-1 74
      track_2-0 65
      track_2-1 71
      track_3-0 64
      track_3-1 153
      track_4-0 87
      track_4-1 110
      track_7-0 76
      track_7-1 155
video_0006 , ped num:  1
      track_3 190
video_0007 , ped num:  2
      track_1 144
      track_2 145
video_0008 , ped num:  1
      track_0 194
video_0009 , ped num:  1
      track_3 89
video_0010 , ped num:  2
      track_4 114
      track_5 112
video_0011 , ped num:  1
      track_0 141
video_0012 , ped num:  2
      track_0 192
      track_1 109
video_0013 , ped num:  5
      track_5 149
      track_59-0 87
      track_59-1 266

In [15]:
# database_name = 'intent_database_' + split_name + '_' + time.strftime("%d%b%Y-%Hh%Mm%Ss") + '.pkl'
database_name = 'intent_database_' + split_name + '.pkl'
with open(os.path.join('../database', database_name), 'wb') as fid:
    pickle.dump(db, fid)

In [16]:
with open(os.path.join('../database', database_name), 'rb') as fid:
    imdb = pickle.load(fid)

In [17]:
imdb.keys()

NameError: name 'imdb' is not defined

In [None]:
annotation['pedestrians']['track_0']['cognitive_annotations']['nlp_vid_11_uid_4760'].keys()#['intent']

In [None]:
['video_name', 'pedestrians']['track_0']['observed_frames', 'cv_annotations', 'cognitive_annotations']
['nlp_vid_11_uid_4760', 'nlp_vid_12_uid_5226', 'nlp_vid_20_uid_5345', 'nlp_vid_23_uid_4796', 'nlp_vid_24_uid_4813',
 'nlp_vid_26_uid_4808', 'nlp_vid_26_uid_4814', 'nlp_vid_34_uid_4791', 'nlp_vid_37_uid_4761', 'nlp_vid_37_uid_4811',
 'nlp_vid_6_uid_5159']
['intent', 'description', 'key_frame']

In [None]:
annotation['pedestrians']['track_0']['cv_annotations']