-
Notifications
You must be signed in to change notification settings - Fork 8
/
process.py
796 lines (667 loc) · 32.3 KB
/
process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
"""
Post-process data in Spectacular AI format and convert it to input
for NeRF or Gaussian Splatting methods, or export optimized pointclouds in ply and pcd formats.
"""
import json
import os
import math
from collections import OrderedDict
# --- The following mechanism allows using this both as a stand-alone
# script and as a subcommand in sai-cli.
def define_args(parser):
parser.add_argument("input", help="Path to folder with session to process")
parser.add_argument("output", help="Output folder, or filename with [.ply, .pcd] or [.obj] extension for exporting pointcloud or mesh")
parser.add_argument('--format', choices=['taichi', 'nerfstudio'], default='nerfstudio', help='Output format.')
parser.add_argument("--cell_size", help="Dense point cloud decimation cell size (meters)", type=float, default=0.1)
parser.add_argument("--distance_quantile", help="Max point distance filter quantile (0 = disabled)", type=float, default=0.99)
parser.add_argument("--key_frame_distance", help="Minimum distance between keyframes (meters)", type=float, default=0.05)
parser.add_argument('--no_icp', action='store_true')
parser.add_argument('--device_preset', choices=['none', 'oak-d', 'k4a', 'realsense', 'android', 'android-tof', 'ios-tof', 'orbbec-astra2', 'orbbec-femto'], help="Automatically detected in most cases")
parser.add_argument('--fast', action='store_true', help='Fast but lower quality settings')
parser.add_argument('--mono', action='store_true', help='Monocular mode: disable ToF and stereo data')
parser.add_argument('--internal', action='append', type=str, help='Internal override parameters in the form --internal=name:value')
parser.add_argument('--blur_filter_range', type=int, default=4, help='Remove key frames that are the blurriest in a neighborhood of this size (0=disabled)')
parser.add_argument('--no_undistort', action='store_true', help='Do not undistort output images (only supported with certain devices)')
parser.add_argument('--image_format', type=str, default='jpg', help="Color image format (use 'png' for top quality)")
parser.add_argument("--texturize", help="Add textures to mesh export (BETA)", action="store_true")
parser.add_argument("--preview", help="Show latest primary image as a preview", action="store_true")
parser.add_argument("--preview3d", help="Show 3D visualization", action="store_true")
return parser
def define_subparser(subparsers):
sub = subparsers.add_parser('process', help=__doc__.strip())
sub.set_defaults(func=process)
return define_args(sub)
def interpolate_missing_properties(df_source, df_query, k_nearest=3):
import pandas as pd
from scipy.spatial import KDTree
xyz = list('xyz')
print('generating a simplified point cloud (this may take a while...)')
tree = KDTree(df_source[xyz].values)
_, ii = tree.query(df_query[xyz], k=k_nearest)
n = df_query.shape[0]
df_result = pd.DataFrame(0, index=range(n), columns=df_source.columns)
df_result[xyz] = df_query[xyz]
other_cols = [c for c in df_source.columns if c not in xyz]
for i in range(n):
m = df_source.loc[ii[i].tolist(), other_cols].mean(axis=0)
df_result.loc[i, other_cols] = m
return df_result
def exclude_points(df_source, df_exclude, radius):
from scipy.spatial import KDTree
xyz = list('xyz')
tree = KDTree(df_exclude[xyz].values)
ii = tree.query_ball_point(df_source[xyz], r=radius, return_length=True)
mask = [l == 0 for l in ii]
df_result = df_source.iloc[mask]
return df_result
def voxel_decimate(df, cell_size):
def grouping_function(row):
return tuple([round(row[c] / cell_size) for c in 'xyz'])
grouped = df.assign(voxel_index=df.apply(grouping_function, axis=1)).groupby('voxel_index')
return grouped.first().reset_index()[[c for c in df.columns if c != 'voxel_index']]
def compute_cam_velocities(targetFrame, angularVelocity):
# Image and pose data
WToC = targetFrame.cameraPose.getWorldToCameraMatrix()
vW = targetFrame.cameraPose.velocity
vCam = WToC[:3, :3] @ [vW.x, vW.y, vW.z]
vAngCam = WToC[:3, :3] @ [angularVelocity.x, angularVelocity.y, angularVelocity.z]
return vCam, vAngCam
def blurScore(WToC, vCam, vAngCam, targetFrame, exposureTime):
import numpy as np
sumVels = 0
n = 0
for mpObs in targetFrame.sparseFeatures:
pW = mpObs.position
pCam = (WToC @ [pW.x, pW.y, pW.z, 1])[:3]
pointVelCam = vCam + np.cross(vAngCam, pCam)
vPix = targetFrame.cameraPose.camera.getIntrinsicMatrix()[:2,:2] @ (pointVelCam[:2] / np.maximum(pCam[2], 1e-6))
n += 1
sumVels += np.linalg.norm(vPix)
if exposureTime > 0:
sumVels *= exposureTime
# print('blur score %g (n = %d)' % (float(sumVels) / max(n, 1), n))
if n == 0: return 1e6
return sumVels / n
def point_cloud_data_frame_to_ply(df, out_fn):
with open(out_fn, 'wt') as f:
f.write('\n'.join([
'ply',
'format ascii 1.0',
'element vertex %d' % len(df),
'property float x',
'property float y',
'property float z',
'property uint8 red',
'property uint8 green',
'property uint8 blue',
'end_header'
]) + '\n')
for _, row in df.iterrows():
r = []
for prop in 'xyz': r.append(row[prop])
for prop in 'rgb': r.append(int(row[prop]))
f.write(' '.join([str(v) for v in r]) + '\n')
def convert_distortion(cam):
coeffs = cam.get('distortionCoefficients', None)
if coeffs is None:
return None
if all([c == 0.0 for c in coeffs]): return None
get_coeffs = lambda names: dict(zip(names.split(), coeffs))
model = 'OPENCV'
if cam['model'] == 'brown-conrady':
r = get_coeffs('k1 k2 p1 p2 k3 k4 k5 k6')
elif cam['model'] == 'pinhole':
r = get_coeffs('k1 k2 k3')
r['p1'] = 0
r['p2'] = 0
elif cam['model'] == 'kannala-brandt4':
model = 'OPENCV_FISHEYE'
r = get_coeffs('k1 k2 k3 k4')
else:
raise RuntimeError(f"unsupported camera model: {cam['model']}")
r['model'] = model
r['cx'] = cam['principalPointX']
r['cy'] = cam['principalPointY']
r['fx'] = cam['focalLengthX']
r['fy'] = cam['focalLengthY']
return r
def convert_json_taichi_to_nerfstudio(d):
import numpy as np
CAM_CONVENTION_CHANGE = np.array([
[1, 0, 0, 0],
[0,-1, 0, 0],
[0, 0,-1, 0],
[0, 0, 0, 1]
])
INV_CAM_CONVENTION_CHANGE = CAM_CONVENTION_CHANGE # works for this particular matrix
def transform_matrix_cam_to_world(c):
return (np.array(c) @ CAM_CONVENTION_CHANGE).tolist()
def transform_camera_dir_vec(c):
return (INV_CAM_CONVENTION_CHANGE[:3, :3] @ c).tolist()
by_camera = {}
for c in d:
k = c['camera_intrinsics']
params = {
"fl_x": k[0][0],
"fl_y": k[1][1],
"k1": 0,
"k2": 0,
"p1": 0,
"p2": 0,
"cx": k[0][2],
"cy": k[1][2],
"w": c['camera_width'],
"h": c['camera_height'],
"aabb_scale": 16,
"frames": [],
"orientation_override": "none", # stops Nerfstudio from breaking our "up" direction
"auto_scale_poses_override": False,
"ply_file_path": "./sparse_pc.ply"
}
distortion = c.get('camera_distortion', None)
if distortion is not None:
for k, v in distortion.items():
params[k] = v
for prop in ['rolling_shutter_time', 'exposure_time']:
if c[prop] is not None and c[prop] != 0:
params[prop] = c[prop]
cam_id = json.dumps(params, sort_keys=True)
if cam_id not in by_camera:
by_camera[cam_id] = params
converted = {
'file_path': os.path.join("./images", c['image_path'].split('/')[-1]),
"transform_matrix": transform_matrix_cam_to_world(c['T_pointcloud_camera']),
"camera_linear_velocity": transform_camera_dir_vec(c['camera_linear_velocity']),
"camera_angular_velocity": transform_camera_dir_vec(c['camera_angular_velocity']),
"motion_blur_score": c["motion_blur_score"]
}
if 'depth_image_path' in c:
converted['depth_file_path'] = os.path.join("./images", c['depth_image_path'].split('/')[-1])
by_camera[cam_id]['frames'].append(converted)
if len(by_camera) != 1:
raise RuntimeError("unexpected number of cameras")
key, value = list(by_camera.items())[0]
return value
# TODO: don't use "Taichi" as the intermediate format
def convert_json_taichi_to_colmap(pose_data, points_df, sparse_observations, nerfstudio_fake_obs=True):
from scipy.spatial.transform import Rotation as R
import numpy as np
images = []
cameras = []
camera_id = 0
max_pt_id = 0
for image_id, c in enumerate(pose_data):
k = c['camera_intrinsics']
mat = np.linalg.inv(np.array(c['T_pointcloud_camera']))
qx,qy,qz,qw = R.from_matrix(mat[:3,:3]).as_quat()
q = [qw, qx, qy, qz]
p = list(mat[:3, 3])
images.append([image_id] + list(q) + list(p) + [camera_id, os.path.split(c['image_path'])[-1]])
points = []
for pt in sparse_observations.get(image_id, {}):
max_pt_id = max(max_pt_id, pt.id)
points.extend([pt.pixelCoordinates.x, pt.pixelCoordinates.y, pt.id])
if nerfstudio_fake_obs and len(points) == 0:
points = [100,100,0,200,200,1] # NeRFstudio loader will crash without this
images.append(points)
# TODO: variable intrinsics
if len(cameras) == 0:
cameras = [[
camera_id,
'PINHOLE',
c['camera_width'],
c['camera_height'],
k[0][0],
k[1][1],
k[0][2],
k[1][2]
]]
points = []
for _, row in points_df.iterrows():
if 'id' in row:
point_id = row['id']
else:
point_id = 0
if point_id == 0:
point_id = max_pt_id + 1
max_pt_id += 1
point = [
int(point_id),
row['x'],
row['y'],
row['z'],
round(row['r']),
round(row['g']),
round(row['b'])
]
# TODO: compute reprojection errors here if really necessary for some use case
if nerfstudio_fake_obs:
fake_err = 1
img_id, point_id = 0, 0
point.extend([fake_err, img_id, point_id])
points.append(point)
return points, images, cameras
def process(args):
import spectacularAI
import cv2
import shutil
import tempfile
import numpy as np
import pandas as pd
PC_AND_MESH_FORMATS = ['ply', 'pcd', 'obj']
# Overwrite format if output is set to pointcloud
for fmt in PC_AND_MESH_FORMATS:
if args.output.endswith('.' + fmt):
args.format = fmt
break
useMono = None
# Globals
savedKeyFrames = {}
pointClouds = {}
sparsePointColors = {}
blurScores = {}
frameWidth = -1
frameHeight = -1
intrinsics = None
visualizer = None
isTracking = False
finalMapWritten = False
exposureTime = 0
rollingShutterTime = 0
cameraDistortion = None
def post_process_point_clouds(globalPointCloud, sparse_point_cloud_df):
# Save point clouds
if len(globalPointCloud) == 0:
merged_df = sparse_point_cloud_df
else:
point_cloud_df = pd.DataFrame(np.array(globalPointCloud), columns=list('xyzrgb'))
# drop uncolored points
colored_point_cloud_df = point_cloud_df.loc[point_cloud_df[list('rgb')].max(axis=1) > 0].reset_index()
colored_point_cloud_df['id'] = 0 # ID = 0 is not used for valid sparse map points
filtered_point_cloud_df = exclude_points(colored_point_cloud_df, sparse_point_cloud_df, radius=args.cell_size)
decimated_df = voxel_decimate(filtered_point_cloud_df, args.cell_size)
# the dense points clouds have presumably more stable colors at corner points
# rather use them than using the same approach as without dense data
sparse_colored_point_cloud_df = interpolate_missing_properties(colored_point_cloud_df, sparse_point_cloud_df[list('xyz')])
merged_df = pd.concat([sparse_colored_point_cloud_df, decimated_df])
if args.distance_quantile > 0:
dist2 = (merged_df[list('xyz')]**2).sum(axis=1).values
MARGIN = 1.5
max_dist2 = np.quantile(dist2, args.distance_quantile) * MARGIN**2
print(f'filtering out points further than {np.sqrt(max_dist2)}m')
merged_df = merged_df.iloc[dist2 < max_dist2]
return merged_df
def process_mapping_output(output):
nonlocal savedKeyFrames
nonlocal pointClouds
nonlocal sparsePointColors
nonlocal blurScores
nonlocal frameWidth
nonlocal frameHeight
nonlocal intrinsics
nonlocal visualizer
nonlocal useMono
nonlocal finalMapWritten
if visualizer is not None:
visualizer.onMappingOutput(output)
saveImages = True
if args.format in PC_AND_MESH_FORMATS:
saveImages = False
if output.finalMap: finalMapWritten = True
return
if not output.finalMap:
# New frames, let's save the images to disk
for frameId in output.updatedKeyFrames:
keyFrame = output.map.keyFrames.get(frameId)
if not keyFrame or savedKeyFrames.get(frameId):
continue
savedKeyFrames[frameId] = True
frameSet = keyFrame.frameSet
targetFrame = frameSet.rgbFrame
if not targetFrame: targetFrame = frameSet.primaryFrame
if not targetFrame or not targetFrame.image: continue
if keyFrame.pointCloud:
pointClouds[frameId] = (
np.copy(keyFrame.pointCloud.getPositionData()),
np.copy(keyFrame.pointCloud.getRGB24Data()))
if frameWidth < 0:
frameWidth = targetFrame.image.getWidth()
frameHeight = targetFrame.image.getHeight()
frameSet = keyFrame.frameSet
if args.no_undistort:
undistortedFrame = targetFrame
else:
undistortedFrame = frameSet.getUndistortedFrame(targetFrame)
if intrinsics is None: intrinsics = undistortedFrame.cameraPose.camera.getIntrinsicMatrix()
img = undistortedFrame.image.toArray()
bgrImage = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
if saveImages:
fileName = f"{tmp_dir}/frame_{frameId:05}.{args.image_format}"
cv2.imwrite(fileName, bgrImage)
# Find colors for sparse features
SHOW_FEATURE_MARKERS = True
SHOW_MOTION_BLUR = False
WToC = targetFrame.cameraPose.getWorldToCameraMatrix()
vCam, vAngCam = compute_cam_velocities(targetFrame, keyFrame.angularVelocity)
blurScores[frameId] = blurScore(WToC, vCam, vAngCam, undistortedFrame, exposureTime)
for mpObs in undistortedFrame.sparseFeatures:
pPix = [mpObs.pixelCoordinates.x, mpObs.pixelCoordinates.y]
px = np.clip(round(pPix[0]), 0, img.shape[1]-1)
py = np.clip(round(pPix[1]), 0, img.shape[0]-1)
if mpObs.id not in sparsePointColors:
rgb = list(img[py, px, ...].view(np.uint8))
sparsePointColors[mpObs.id] = rgb
markerColor = (0, 255, 0)
else:
markerColor = (0, 128, 0)
if args.preview:
if SHOW_FEATURE_MARKERS:
cv2.circle(bgrImage, (px, py), 5, markerColor, thickness=1)
if SHOW_MOTION_BLUR:
BLUR_COLOR = (128, 255, 0)
VISU_SCALE = 5
pW = mpObs.position
pCam = (WToC @ [pW.x, pW.y, pW.z, 1])[:3]
pointVelCam = vCam + np.cross(vAngCam, pCam)
vPix = undistortedFrame.cameraPose.camera.getIntrinsicMatrix()[:2,:2] @ (pointVelCam[:2] / np.maximum(pCam[2], 1e-6))
dt = float(VISU_SCALE) / 30 # visualization only
vPix *= dt
blurBegin = [int(c) for c in pPix - vPix*dt/2]
blurEnd = [int(c) for c in pPix + vPix*dt/2]
cv2.line(bgrImage, (blurBegin[0], blurBegin[1]), (blurEnd[0], blurEnd[1]), BLUR_COLOR, thickness=1)
# Legacy: support SDK versions which also produced images where frameSet.depthFrame.image was None
if frameSet.depthFrame is not None and frameSet.depthFrame.image is not None and not useMono:
alignedDepth = frameSet.getAlignedDepthFrame(undistortedFrame)
depthData = alignedDepth.image.toArray()
if saveImages:
depthFrameName = f"{tmp_dir}/depth_{frameId:05}.png"
cv2.imwrite(depthFrameName, depthData)
DEPTH_PREVIEW = False
if args.preview and DEPTH_PREVIEW:
DEPTH_COLOR_MAP_MIDPOINT_M = 2.0
visuDepth = np.log1p(depthData * alignedDepth.depthScale) / np.log1p(DEPTH_COLOR_MAP_MIDPOINT_M) * 0.5 * 256
cv2.imshow("Depth frame", cv2.applyColorMap(np.clip(visuDepth, 0, 255).astype(np.uint8), cv2.COLORMAP_JET))
# TODO: move these visualizations to the main thread
if args.preview:
cv2.imshow("Frame", bgrImage)
cv2.setWindowTitle("Frame", "Frame #{}".format(frameId))
cv2.waitKey(1)
else:
# Final optimized poses
blurryImages = {}
sparseObservations = {}
# OrderedDict to avoid undefined iteration order = different output files for the same input
sparsePointCloud = OrderedDict()
blurriness = []
for frameId in output.map.keyFrames:
blurriness.append((frameId, blurScores.get(frameId, 1e6)))
# Look two images forward and two backwards, if current frame is blurriest, don't use it
if args.blur_filter_range != 0:
assert(args.blur_filter_range > 1)
blur_filter_radius_lo = int(math.ceil((args.blur_filter_range - 1) * 0.5))
blur_filter_radius_hi = int(math.floor((args.blur_filter_range - 1) * 0.5))
print('blur filter range [-%d, %d)' % (blur_filter_radius_lo, blur_filter_radius_hi+1))
for i in range(blur_filter_radius_lo, max(0, len(blurriness) - blur_filter_radius_hi)):
group = [blurriness[j+i] for j in range(-blur_filter_radius_lo,blur_filter_radius_hi+1)]
group.sort(key=lambda x : x[1])
cur = blurriness[i][0]
if group[0][0] == cur:
blurryImages[cur] = True
trainingFrames = []
validationFrames = []
globalPointCloud = []
index = 1 # start from 1 to match COLMAP/Nerfstudio frame numbering (fragile!)
name = os.path.split(args.output)[-1]
for frameId in output.map.keyFrames:
if blurryImages.get(frameId):
print('skipping blurry frame %s' % str(frameId))
continue # Skip blurry images
# Image and pose data
keyFrame = output.map.keyFrames.get(frameId)
targetFrame = keyFrame.frameSet.rgbFrame
if not targetFrame: targetFrame = keyFrame.frameSet.primaryFrame
cameraPose = targetFrame.cameraPose
sparseObsForKeyFrame = []
DEFAULT_POINT_COLOR = [128, 128, 128] # default: 50% gray
for mpObs in targetFrame.sparseFeatures:
# keeping native object: OK since this not used after the callback
sparseObsForKeyFrame.append(mpObs)
sparsePointCloud[mpObs.id] = {
'position': [mpObs.position.x, mpObs.position.y, mpObs.position.z],
'color': sparsePointColors.get(mpObs.id, DEFAULT_POINT_COLOR)
}
sparseObservations[frameId] = sparseObsForKeyFrame
# Camera data
vCam, vAngCam = compute_cam_velocities(targetFrame, keyFrame.angularVelocity)
frame = {
"image_path": f"data/{name}/images/frame_{index:05}.{args.image_format}",
"T_pointcloud_camera": cameraPose.getCameraToWorldMatrix().tolist(), # 4x4 matrix, the transformation matrix from camera coordinate to point cloud coordinate
"camera_intrinsics": intrinsics.tolist(), # 3x3 matrix, the camera intrinsics matrix K
"camera_linear_velocity": vCam.tolist(),
"camera_angular_velocity": vAngCam.tolist(),
"rolling_shutter_time": rollingShutterTime,
"motion_blur_score": blurScores.get(frameId, 1e6),
"exposure_time": exposureTime,
"camera_height": frameHeight, # image height, in pixel
"camera_width": frameWidth, # image width, in pixel
"camera_id": index # camera id, not used
}
if cameraDistortion is not None:
frame['camera_distortion'] = cameraDistortion
oldImgName = f"{tmp_dir}/frame_{frameId:05}.{args.image_format}"
newImgName = f"{args.output}/images/frame_{index:05}.{args.image_format}"
shutil.move(oldImgName, newImgName)
oldDepth = f"{tmp_dir}/depth_{frameId:05}.png"
newDepth = f"{args.output}/images/depth_{index:05}.png"
if os.path.exists(oldDepth):
shutil.move(oldDepth, newDepth)
frame['depth_image_path'] = f"data/{name}/images/depth_{index:05}.png"
if (index + 3) % 7 == 0:
validationFrames.append(frame)
else:
trainingFrames.append(frame)
if frameId in pointClouds:
# Pointcloud data
posData, colorData = pointClouds[frameId]
pc = np.vstack((posData.T, np.ones((1, posData.shape[0]))))
pc = (cameraPose.getCameraToWorldMatrix() @ pc)[:3, :].T
pc = np.hstack((pc, colorData))
globalPointCloud.extend(pc)
index += 1
data = [list([pointId]) + list(point['position']) + list(point['color']) for pointId, point in sparsePointCloud.items()]
sparse_point_cloud_df = pd.DataFrame(
data,
columns=['id'] + list('xyzrgb'))
for c in 'rgb': sparse_point_cloud_df[c] = sparse_point_cloud_df[c].astype(np.uint8)
merged_df = post_process_point_clouds(
globalPointCloud,
sparse_point_cloud_df)
# print(merged_df)
if args.format == 'taichi':
# merged_df.to_csv(f"{args.output}/points.merged-decimated.csv", index=False)
merged_df[list('xyzrgb')].to_parquet(f"{args.output}/point_cloud.parquet")
with open(f"{args.output}/train.json", "w") as outFile:
json.dump(trainingFrames, outFile, indent=2, sort_keys=True)
with open(f"{args.output}/val.json", "w") as outFile:
json.dump(validationFrames, outFile, indent=2, sort_keys=True)
elif args.format == 'nerfstudio':
allFrames = trainingFrames + validationFrames
with open(f"{args.output}/transforms.json", "w") as outFile:
json.dump(convert_json_taichi_to_nerfstudio(allFrames), outFile, indent=2, sort_keys=True)
# colmap text point format
fake_colmap = f"{args.output}/colmap/sparse/0"
os.makedirs(fake_colmap, exist_ok=True)
c_points, c_images, c_cameras = convert_json_taichi_to_colmap(allFrames, merged_df, sparseObservations, nerfstudio_fake_obs=True)
def write_colmap_csv(data, fn):
with open(fn, 'wt') as f:
for row in data:
f.write(' '.join([str(c) for c in row])+'\n')
# splatfacto point cloud format
point_cloud_data_frame_to_ply(merged_df, f"{args.output}/sparse_pc.ply")
write_colmap_csv(c_points, f"{fake_colmap}/points3D.txt")
write_colmap_csv(c_images, f"{fake_colmap}/images.txt")
write_colmap_csv(c_cameras, f"{fake_colmap}/cameras.txt")
finalMapWritten = True
def on_vio_output(vioOutput):
nonlocal visualizer, isTracking
wasTracking = isTracking
isTracking = vioOutput.status == spectacularAI.TrackingStatus.TRACKING
if wasTracking and not isTracking:
print('warning: Lost tracking!')
if visualizer is not None:
visualizer.onVioOutput(vioOutput.getCameraPose(0), status=vioOutput.status)
def on_mapping_output(output):
try:
process_mapping_output(output)
except Exception as e:
print(f"ERROR: {e}", flush=True)
raise
def is_already_rectified(input_dir):
vioConfigYaml = f"{input_dir}/vio_config.yaml"
if os.path.exists(vioConfigYaml):
with open(vioConfigYaml) as file:
for line in file:
if "alreadyRectified" in line:
_, value = line.split(":")
return value.lower().strip() == "true"
return False
def parse_input_dir(input_dir):
cameras = None
calibrationJson = f"{input_dir}/calibration.json"
if os.path.exists(calibrationJson):
with open(calibrationJson) as f:
calibration = json.load(f)
if "cameras" in calibration:
cameras = calibration["cameras"]
device = None
metadataJson = f"{input_dir}/metadata.json"
if os.path.exists(metadataJson):
with open(metadataJson) as f:
metadata = json.load(f)
if metadata.get("platform") == "ios":
device = "ios-tof"
if device == None:
vioConfigYaml = f"{input_dir}/vio_config.yaml"
if os.path.exists(vioConfigYaml):
with open(vioConfigYaml) as file:
supported = ['oak-d', 'k4a', 'realsense', 'orbbec-astra2', 'orbbec-femto', 'android', 'android-tof']
for line in file:
if "parameterSets" in line:
for d in supported:
if d in line:
device = d
break
if device: break
return (device, cameras)
config = {
"maxMapSize": 0,
"useSlam": True,
"passthroughColorImages": True,
"keyframeDecisionDistanceThreshold": args.key_frame_distance,
"icpVoxelSize": min(args.key_frame_distance, 0.1)
}
parameter_sets = ['wrapper-base']
tmp_dir = None
if args.format in ['ply', 'pcd']:
config["mapSavePath"] = args.output
parameter_sets.append('point-cloud')
elif args.format == 'obj':
assert not args.mono
config['recMeshSavePath'] = args.output
config['recTexturize'] = args.texturize
parameter_sets.append('meshing')
else:
# Clear output dir
shutil.rmtree(f"{args.output}/images", ignore_errors=True)
os.makedirs(f"{args.output}/images", exist_ok=True)
tmp_dir = tempfile.mkdtemp()
device_preset, cameras = parse_input_dir(args.input)
if cameras is not None:
cam = cameras[0]
exposureTime = cam.get('exposureTimeSeconds', 0)
rollingShutterTime = cam.get('shutterRollTimeSeconds', 0)
if args.no_undistort:
cameraDistortion = convert_distortion(cam)
useMono = args.mono or (cameras != None and len(cameras) == 1)
if useMono: config['useStereo'] = False
prefer_icp = not args.no_icp and not useMono
if not args.fast:
parameter_sets.append('offline-base')
# remove these to further trade off speed for quality
mid_q = {
'maxKeypoints': 1000,
'optimizerMaxIterations': 30
}
for k, v in mid_q.items(): config[k] = v
if args.device_preset:
device_preset = args.device_preset
if args.internal is not None:
for param in args.internal:
k, _, v = param.partition(':')
config[k] = v
if device_preset: print(f"Selected device type: {device_preset}", flush=True)
else: print("Warning! Couldn't automatically detect device preset, to ensure best results suply one via --device_preset argument", flush=True)
if device_preset:
parameter_sets.append(device_preset)
if device_preset == 'k4a':
if prefer_icp:
parameter_sets.extend(['icp'])
if not args.fast: parameter_sets.append('offline-icp')
elif device_preset == 'realsense':
if prefer_icp:
parameter_sets.extend(['icp', 'realsense-icp'])
if not args.fast: parameter_sets.append('offline-icp')
elif device_preset == 'oak-d':
config['stereoPointCloudMinDepth'] = 0.5
config['alreadyRectified'] = is_already_rectified(args.input) # rectification required for stereo point cloud
elif device_preset is not None and "orbbec" in device_preset:
if prefer_icp:
parameter_sets.extend(['icp'])
if not args.fast: parameter_sets.append('offline-icp')
if args.preview3d:
from spectacularAI.cli.visualization.visualizer import Visualizer, VisualizerArgs
visArgs = VisualizerArgs()
visArgs.targetFps = 30
visArgs.showCameraModel = False
visualizer = Visualizer(visArgs)
config['parameterSets'] = parameter_sets
print(config)
replay = spectacularAI.Replay(args.input, mapperCallback = on_mapping_output, configuration = config, ignoreFolderConfiguration = True)
replay.setOutputCallback(on_vio_output)
try:
if visualizer is None:
replay.runReplay()
else:
replay.startReplay()
visualizer.run()
replay.close()
except Exception as e:
print(f"Something went wrong! {e}", flush=True)
raise e
replay = None
if tmp_dir is not None:
try:
shutil.rmtree(tmp_dir)
except:
print(f"Failed to clean temporary directory, you can delete these files manually, they are no longer required: {tmp_dir}", flush=True)
if not finalMapWritten:
print('Mapping failed: no output generated')
exit(1)
print("Done!\n", flush=True)
if args.format == 'taichi':
name = os.path.split(args.output)[-1]
print("You should use following paths in taichi_3d_gaussian_splatting config file:", flush=True)
print(f"pointcloud-parquet-path: 'data/{name}/point_cloud.parquet'", flush=True)
print(f"summary-writer-log-dir: data/{name}/logs", flush=True)
print(f"output-model-dir: data/{name}/output", flush=True)
print(f"train-dataset-json-path: 'data/{name}/train.json'", flush=True)
print(f"val-dataset-json-path: 'data/{name}/val.json'", flush=True)
else:
print(f'output written to {args.output}', flush=True)
if __name__ == '__main__':
def parse_args():
import argparse
parser = argparse.ArgumentParser(description=__doc__.strip())
parser = define_args(parser)
return parser.parse_args()
process(parse_args())