In [1]:
import numpy as np
import json
import os
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['image.interpolation'] = 'nearest'

# For camera projection (with distortion)
import panutils

In [2]:
# Setup paths
data_path = ''#'/home/wanyue/Desktop/panoptic-toolbox/'
data_path_real = '../'
seq_name = '171204_pose3'
kinect_img_path = data_path + seq_name + '/kinoptic_rgb/'
depth_img_path = data_path + seq_name + '/kinoptic_depth/'
confidence_img_path = data_path + seq_name + '/kinoptic_confidence/'
bbox_path = data_path_real + seq_name + '/bbox/'
colors = plt.cm.hsv(np.linspace(0, 1, 10)).tolist()

## Load camera calibration parameters

In [3]:
# Load camera calibration parameters
with open( data_path_real + seq_name + '/kcalibration_{0}.json'.format(seq_name)) as cfile:
    calib = json.load(cfile)

# append camera index as key eg (50, 1)
# for kinect camera, panel = 50, node ranges from 1 to 10 
# to be consistent with matlab format, camera index starts with 1
cameras = {(50,idx+1):cam for idx, cam in enumerate(calib['sensors'])}

# Convert data into numpy arrays for convenience
# get rgb camera parameters associated with kinect camera 
for k,cam in cameras.items():    
    cam['K'] = np.matrix(cam['K_color'])
    cam['distCoef'] = np.array(cam['distCoeffs_color'])
    cam['R'] = np.matrix(np.matrix(cam['M_color'])[0:3,0:3])
    cam['t'] = np.matrix(cam['M_color'])[0:3,3].reshape((3,1))

In [4]:
cameras_for_json = {}

for cam_id in range(1, 11):
    cc = cameras[(50,cam_id)]
    cameras_for_json['{0:02d}_{1:02d}'.format(50,cam_id)] = {'K': cc['K'].reshape(9).tolist()[0],
                                                            'distCoef': cc['distCoef'].tolist(),
                                                            'R': cc['R'].reshape(9).tolist()[0],
                                                            't': cc['t'].reshape(3).tolist()
                                                           }

In [16]:
## get names of the image files.
import glob
image_folder_name = data_path_real+seq_name+'/kinoptic_rgb/50_06/'
image_names = sorted(glob.glob(image_folder_name + '*.jpg'))
num_images = len(image_names)

In [18]:
num_images = num_images - 100 # not counting last 100 images since not all camera has the same number of frames

## Sampling frames and creating annotation and image objects

### image objects contains   
 'id':       
 'cam':   
 'image_name':   
 'depth_name':  
 'confidence_name':   
 
 
### annotation objects contains:  
'id':  
'bbox': path of bbox for each image obtained from maskrcnn 



In [20]:
'{0:02d}_{1:02d}/{0:02d}_{1:02d}_{2:08d}.npy'.format(50, selected_cameras[i], img_idx)

'50_08/50_08_00000234.npy'

In [21]:
images = []
annotations = []
sample_rate = 1
start_index = 200 
cnt = 0
for idx in range(num_images)[::sample_rate]: 
    img_idx = start_index + idx
    selected_cameras = [1,2,3,4,5,6,7,8,9,10] # all 10 kinect cameras are selected
    for i in range(len(selected_cameras)):
        # get bbox first since it may not be defined for all frames (person object is absent)
        bbox_full_path = bbox_path + '{0:02d}_{1:02d}/{0:02d}_{1:02d}_{2:08d}.npy'.format(50, selected_cameras[i], img_idx)
        if not os.path.exists(bbox_full_path):
            continue
        bbox_array = np.load(bbox_full_path)
        annotations.append( { 'id' : cnt,
                              'bbox' : bbox_array.tolist(), # make np array json serializable
                            })
        
        image_path = kinect_img_path+'{0:02d}_{1:02d}/{0:02d}_{1:02d}_{2:08d}.jpg'.format(50, selected_cameras[i], img_idx)
        images.append( { 'id': cnt,
                        'cam': '{0:02d}_{1:02d}'.format(50,selected_cameras[i]),
                        'image_name': kinect_img_path + '{0:02d}_{1:02d}/{0:02d}_{1:02d}_{2:08d}.jpg'.format(50, selected_cameras[i], img_idx),
                        'depth_name': depth_img_path + '{0:02d}_{1:02d}/{0:02d}_{1:02d}_{2:08d}.jpg'.format(50, selected_cameras[i], img_idx),
                        'confidence_name': confidence_img_path + '{0:02d}_{1:02d}/{0:02d}_{1:02d}_{2:08d}.jpg'.format(50, selected_cameras[i], img_idx)
            })

        cnt = cnt + 1

In [22]:
data  = { 'cameras':   cameras_for_json,
          'annotations': annotations,
          'images': images
        }

with open('cmu_depth_training.json', 'w') as outfile:
    json.dump(data, outfile)

In [28]:
len(data['images'])

36873

In [26]:
len(data)

3