-
Notifications
You must be signed in to change notification settings - Fork 3
/
KITTIloader2015.py
207 lines (170 loc) · 7.31 KB
/
KITTIloader2015.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
"""
Author: Haoping Xu
adapted from https://github.com/charlesq34/frustum-pointnets
"""
import os
import os.path
import numpy as np
IMG_EXTENSIONS = [
'.jpg', '.JPG', '.jpeg', '.JPEG',
'.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP',
]
def is_image_file(filename):
return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)
def dataloader(filepath, load):
'''
data loader used for training PSMnet CNN model
'''
left_fold = 'image_2/'
right_fold = 'image_3/'
disp_L = 'disp_occ_0/'
image = [img for img in os.listdir(filepath + left_fold)]
if load is not None:
train = []
val = image
disp_train_L = []
disp_val_L = []
else:
train = image[:160]
val = image[160:]
disp_train_L = [filepath + disp_L + img for img in train]
disp_val_L = [filepath + disp_L + img for img in val]
left_train = [filepath + left_fold + img for img in train]
right_train = [filepath + right_fold + img for img in train]
left_val = [filepath + left_fold + img for img in val]
right_val = [filepath + right_fold + img for img in val]
return left_train, right_train, disp_train_L, left_val, right_val, disp_val_L, train, val
class Object3d(object):
''' 3d object label '''
def __init__(self, label_file_line):
data = label_file_line.split(' ')
data[1:] = [float(x) for x in data[1:]]
# extract label, truncation, occlusion
self.type = data[0] # 'Car', 'Pedestrian', ...
self.truncation = data[1] # truncated pixel ratio [0..1]
self.occlusion = int(data[2]) # 0=visible, 1=partly occluded, 2=fully occluded, 3=unknown
self.alpha = data[3] # object observation angle [-pi..pi]
# extract 2d bounding box in 0-based coordinates
self.xmin = data[4] # left
self.ymin = data[5] # top
self.xmax = data[6] # right
self.ymax = data[7] # bottom
self.box2d = np.array([self.xmin, self.ymin, self.xmax, self.ymax])
# extract 3d bounding box information
self.h = data[8] # box height
self.w = data[9] # box width
self.l = data[10] # box length (in meters)
self.t = np.array([data[11], data[12], data[13]]) # location (x,y,z) in camera coord.
self.ry = data[14] # yaw angle (around Y-axis in camera coordinates) [-pi..pi]
def print_object(self):
print('Type, truncation, occlusion, alpha: %s, %d, %d, %f' % \
(self.type, self.truncation, self.occlusion, self.alpha))
print('2d bbox (x0,y0,x1,y1): %f, %f, %f, %f' % \
(self.xmin, self.ymin, self.xmax, self.ymax))
print('3d bbox h,w,l: %f, %f, %f' % \
(self.h, self.w, self.l))
print('3d bbox location, ry: (%f, %f, %f), %f' % \
(self.t[0], self.t[1], self.t[2], self.ry))
def read_label(label_filename):
lines = [line.rstrip() for line in open(label_filename)]
objects = [Object3d(line) for line in lines]
return objects
def read_2d_box(box_file):
objects = []
empty_list = "Car 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0"
for line in open(box_file):
if not line.strip():
continue
data = line.split()
new_obj = Object3d(empty_list)
new_obj.type = 'Car'
new_obj.xmin = float(data[0])
new_obj.ymin = float(data[1])
new_obj.xmax = float(data[2])
new_obj.ymax = float(data[3])
new_obj.box2d = np.array([float(i) for i in data])
objects.append(new_obj)
return objects
def read_calib_file(filepath):
''' Read in a calibration file and parse into a dictionary.
Ref: https://github.com/utiasSTARS/pykitti/blob/master/pykitti/utils.py
'''
data = {}
with open(filepath, 'r') as f:
for line in f.readlines():
line = line.rstrip()
if len(line) == 0: continue
key, value = line.split(':', 1)
# The only non-float values in these files are dates, which
# we don't care about anyway
try:
data[key] = np.array([float(x) for x in value.split()])
except ValueError:
pass
return data
class Calib(object):
def __init__(self, calib_filepath):
calibs = read_calib_file(calib_filepath)
# Projection matrix from rect camera coord to image2 coord
self.P = calibs['P2']
self.P = np.reshape(self.P, [3, 4])
# Rigid transform from Velodyne coord to reference camera coord
self.V2C = calibs['Tr_velo_to_cam']
self.V2C = np.reshape(self.V2C, [3, 4])
# Rotation from reference camera coord to rect camera coord
self.R0 = calibs['R0_rect']
self.R0 = np.reshape(self.R0, [3, 3])
# Camera intrinsics and extrinsics
self.c_u = self.P[0, 2]
self.c_v = self.P[1, 2]
self.f_u = self.P[0, 0]
self.f_v = self.P[1, 1]
self.b_x = self.P[0, 3] / (-self.f_u) # relative
self.b_y = self.P[1, 3] / (-self.f_v)
def cart2hom(self, pts_3d):
''' Input: nx3 points in Cartesian
Oupput: nx4 points in Homogeneous by pending 1
'''
n = pts_3d.shape[0]
pts_3d_hom = np.hstack((pts_3d, np.ones((n, 1))))
return pts_3d_hom
def project_velo_to_ref(self, pts_3d_velo):
pts_3d_velo = self.cart2hom(pts_3d_velo) # nx4
return np.dot(pts_3d_velo, np.transpose(self.V2C))
def project_rect_to_ref(self, pts_3d_rect):
''' Input and Output are nx3 points '''
return np.transpose(np.dot(np.linalg.inv(self.R0), np.transpose(pts_3d_rect)))
def project_ref_to_rect(self, pts_3d_ref):
''' Input and Output are nx3 points '''
return np.transpose(np.dot(self.R0, np.transpose(pts_3d_ref)))
def project_rect_to_image(self, pts_3d_rect):
''' Input: nx3 points in rect camera coord.
Output: nx2 points in image2 coord.
'''
pts_3d_rect = self.cart2hom(pts_3d_rect)
pts_2d = np.dot(pts_3d_rect, np.transpose(self.P)) # nx3
pts_2d[:, 0] /= pts_2d[:, 2]
pts_2d[:, 1] /= pts_2d[:, 2]
return pts_2d[:, 0:2]
def project_velo_to_rect(self, pts_3d_velo):
pts_3d_ref = self.project_velo_to_ref(pts_3d_velo)
return self.project_ref_to_rect(pts_3d_ref)
def project_velo_to_image(self, pts_3d_velo):
''' Input: nx3 points in velodyne coord.
Output: nx2 points in image2 coord.
'''
pts_3d_rect = self.project_velo_to_rect(pts_3d_velo)
return self.project_rect_to_image(pts_3d_rect)
def project_image_to_rect(self, uv_depth):
''' Input: nx3 first two channels are uv, 3rd channel
is depth in rect camera coord.
Output: nx3 points in rect camera coord.
'''
n = uv_depth.shape[0]
x = ((uv_depth[:, 0] - self.c_u) * uv_depth[:, 2]) / self.f_u + self.b_x
y = ((uv_depth[:, 1] - self.c_v) * uv_depth[:, 2]) / self.f_v + self.b_y
pts_3d_rect = np.zeros((n, 3))
pts_3d_rect[:, 0] = x
pts_3d_rect[:, 1] = y
pts_3d_rect[:, 2] = uv_depth[:, 2]
return pts_3d_rect