-
Notifications
You must be signed in to change notification settings - Fork 0
/
references.bib
603 lines (548 loc) · 38.1 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
@inproceedings{su2015multi,
title={Multi-view convolutional neural networks for {3D} shape recognition},
author={Su, Hang and Maji, Subhransu and Kalogerakis, Evangelos and Learned-Miller, Erik},
booktitle={Proceedings of the IEEE international conference on computer vision},
year={2015}
}
@article{zoph2016neural,
title={Neural architecture search with reinforcement learning},
author={Zoph, Barret and Le, Quoc V},
journal={arXiv preprint arXiv:1611.01578},
year={2016}
}
@inproceedings{wu20153d,
title={{3D} shapenets: A deep representation for volumetric shapes},
author={Wu, Zhirong and Song, Shuran and Khosla, Aditya and Yu, Fisher and Zhang, Linguang and Tang, Xiaoou and Xiao, Jianxiong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={1912--1920},
year={2015}
}
@inproceedings{kanezaki2018_rotationnet,
title={RotationNet: Joint Object Categorization and Pose Estimation Using Multiviews from Unsupervised Viewpoints},
author={Asako Kanezaki and Yasuyuki Matsushita and Yoshifumi Nishida},
booktitle={Proc.\ of IEEE International Conference on Computer Vision and Pattern Recognition (CVPR)},
year={2018},}
@article{brock2016generative,
title={Generative and discriminative voxel modeling with convolutional neural networks},
author={Brock, Andrew and Lim, Theodore and Ritchie, James M and Weston, Nick},
journal={arXiv preprint arXiv:1608.04236},
year={2016}
}
@article{munozextending,
title={Extending {GG-CNN} through Automated Model Space Exploration using Knowledge Transfer},
author={Mu{\~n}oz, Mario R{\'\i}os and Schomaker, Lambert and Kasaei, S Hamidreza}
}
@inproceedings{morrison2018closing,
title={{Closing the Loop for Robotic Grasping: A Real-time, Generative Grasp Synthesis Approach}},
author={Morrison, Douglas and Corke, Peter and Leitner, J\"urgen},
booktitle={Proc.\ of Robotics: Science and Systems (RSS)},
year={2018}
}
@inproceedings{sift,
title={Object recognition from local scale-invariant features},
author={Lowe, David G},
booktitle={Proceedings of the seventh IEEE international conference on computer vision},
volume={2},
pages={1150--1157},
year={1999},
organization={Ieee}
}
@article{canny,
title={A computational approach to edge detection},
author={Canny, John},
journal={IEEE Transactions on pattern analysis and machine intelligence},
number={6},
pages={679--698},
year={1986},
publisher={Ieee}
}
@inproceedings{surf,
title={Surf: Speeded up robust features},
author={Bay, Herbert and Tuytelaars, Tinne and Van Gool, Luc},
booktitle={European conference on computer vision},
pages={404--417},
year={2006},
organization={Springer}
}
@article{vgg,
title={Very deep convolutional networks for large-scale image recognition},
author={Simonyan, Karen and Zisserman, Andrew},
journal={arXiv preprint arXiv:1409.1556},
year={2014}
}
@article{mobilenet,
title={Mobilenets: Efficient convolutional neural networks for mobile vision applications},
author={Howard, Andrew G and Zhu, Menglong and Chen, Bo and Kalenichenko, Dmitry and Wang, Weijun and Weyand, Tobias and Andreetto, Marco and Adam, Hartwig},
journal={arXiv preprint arXiv:1704.04861},
year={2017}
}
@inproceedings{resnet,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
@inproceedings{imagenet,
author = {Deng, J. and Dong, W. and Socher, R. and Li, L.-J. and Li, K. and Fei-Fei, L.},
title = {{ImageNet: A Large-Scale Hierarchical Image Database}},
booktitle = {CVPR09},
year = {2009},
bibsource = "http://www.image-net.org/papers/imagenet_cvpr09.bib"}
@inproceedings{voxnet,
title={{VoxNet}: A {3D} convolutional neural network for real-time object recognition},
author={Maturana, Daniel and Scherer, Sebastian},
year={2015},
booktitle={2015 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
organization={IEEE}
}
@inproceedings{kasaei2018perceiving,
title={Perceiving, learning, and recognizing 3d objects: An approach to cognitive service robots},
author={Kasaei, S and Sock, Juil and Lopes, Luis Seabra and Tom{\'e}, Ana Maria and Kim, Tae-Kyun},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
year={2018}
}
@article{fpnn,
title={Fpnn: Field probing neural networks for {3D} data},
author={Li, Yangyan and Pirk, Soeren and Su, Hao and Qi, Charles R and Guibas, Leonidas J},
journal={arXiv preprint arXiv:1605.06240},
year={2016}
}
@article{3dgan,
title={Learning a probabilistic latent space of object shapes via {3D} generative-adversarial modeling},
author={Wu, Jiajun and Zhang, Chengkai and Xue, Tianfan and Freeman, William T and Tenenbaum, Joshua B},
journal={arXiv preprint arXiv:1610.07584},
year={2016}
}
@inproceedings{savarese20073d,
title={{3D} generic object categorization, localization and pose estimation},
author={Savarese, Silvio and Fei-Fei, Li},
booktitle={2007 IEEE 11th International Conference on Computer Vision},
pages={1--8},
year={2007},
organization={IEEE}
}
@inproceedings{lai2011scalable,
title={A scalable tree-based approach for joint object and pose recognition},
author={Lai, Kevin and Bo, Liefeng and Ren, Xiaofeng and Fox, Dieter},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
year={2011}
}
@InProceedings{Sock_2017_ICCV,
author = {Sock, Juil and Hamidreza Kasaei, S. and Seabra Lopes, Luis and Kim, Tae-Kyun},
title = {Multi-View {6D} Object Pose Estimation and Camera Motion Planning Using RGBD Images},
booktitle = {Proceedings of the IEEE International Conference on Computer Vision (ICCV) Workshops},
year = {2017}
}
@inproceedings{zhang2013joint,
title={Joint object and pose recognition using homeomorphic manifold analysis},
author={Zhang, Haopeng and El-Gaaly, Tarek and Elgammal, Ahmed and Jiang, Zhiguo},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
year={2013}
}
@inproceedings{bakry2014untangling,
title={Untangling object-view manifold for multiview recognition and pose estimation},
author={Bakry, Amr and Elgammal, Ahmed},
booktitle={European conference on computer vision},
pages={434--449},
year={2014},
organization={Springer}
}
@inproceedings{qi2016volumetric,
title={Volumetric and multi-view {CNN}s for object classification on {3D} data},
author={Qi, Charles R and Su, Hao and Nie{\ss}ner, Matthias and Dai, Angela and Yan, Mengyuan and Guibas, Leonidas J},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5648--5656},
year={2016}
}
@inproceedings{elhoseiny2016comparative,
title={A comparative analysis and study of multiview {CNN} models for joint object categorization and pose estimation},
author={Elhoseiny, Mohamed and El-Gaaly, Tarek and Bakry, Amr and Elgammal, Ahmed},
booktitle={International Conference on Machine learning},
pages={888--897},
year={2016},
organization={PMLR}
}
@inproceedings{kuznetsova2016exploiting,
title={Exploiting view-specific appearance similarities across classes for zero-shot pose prediction: A metric learning approach},
author={Kuznetsova, Alina and Hwang, Sung Ju and Rosenhahn, Bodo and Sigal, Leonid},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
volume={30},
number={1},
year={2016}
}
@inproceedings{dutagaci2010benchmark,
title={A benchmark for best view selection of {3D} objects},
author={Dutagaci, Helin and Cheung, Chun Pan and Godil, Afzal},
booktitle={Proceedings of the ACM workshop on {3D} object retrieval},
pages={45--50},
year={2010}
}
@article{polonsky2005s,
title={What’s in an image?},
author={Polonsky, Oleg and Patan{\'e}, Giuseppe and Biasotti, Silvia and Gotsman, Craig and Spagnuolo, Michela},
journal={The Visual Computer},
volume={21},
number={8},
pages={840--847},
year={2005},
publisher={Springer}
}
@inproceedings{vazquez2001viewpoint,
title={Viewpoint selection using viewpoint entropy.},
author={V{\'a}zquez, Pere-Pau and Feixas, Miquel and Sbert, Mateu and Heidrich, Wolfgang},
booktitle={VMV},
volume={1},
pages={273--280},
year={2001},
organization={Citeseer}
}
@inproceedings{page2003shape,
title={Shape analysis algorithm based on information theory},
author={Page, David L and Koschan, Andreas F and Sukumar, Sreenivas R and Roui-Abidi, Besma and Abidi, Mongi A},
booktitle={Proceedings 2003 International Conference on Image Processing (Cat. No. 03CH37429)},
volume={1},
pages={I--229},
year={2003},
organization={IEEE}
}
@incollection{lee2005mesh,
title={Mesh saliency},
author={Lee, Chang Ha and Varshney, Amitabh and Jacobs, David W},
booktitle={ACM SIGGRAPH 2005 Papers},
pages={659--666},
year={2005}
}
@article{zhou2018,
author={Qian-Yi Zhou and Jaesik Park and Vladlen Koltun},
title={{Open3D}: {A} Modern Library for {3D} Data Processing},
journal={arXiv:1801.09847},
year={2018},
}
@article{li2017hyperband,
title={Hyperband: A novel bandit-based approach to hyperparameter optimization},
author={Li, Lisha and Jamieson, Kevin and DeSalvo, Giulia and Rostamizadeh, Afshin and Talwalkar, Ameet},
journal={The Journal of Machine Learning Research},
volume={18},
number={1},
pages={6765--6816},
year={2017},
publisher={JMLR.org}
}
@inproceedings{sandler2018mobilenetv2,
title={Mobilenetv2: Inverted residuals and linear bottlenecks},
author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={4510--4520},
year={2018}
}
@inproceedings{efficientnet,
title={Efficientnet: Rethinking model scaling for convolutional neural networks},
author={Tan, Mingxing and Le, Quoc},
booktitle={International Conference on Machine Learning},
pages={6105--6114},
year={2019},
organization={PMLR}
}
@article{adam,
title={Adam: A method for stochastic optimization},
author={Kingma, Diederik P and Ba, Jimmy},
journal={arXiv preprint arXiv:1412.6980},
year={2014}
}
@inproceedings{pepik20153d,
title={3d object class detection in the wild},
author={Pepik, Bojan and Stark, Michael and Gehler, Peter and Ritschel, Tobias and Schiele, Bernt},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops},
pages={1--10},
year={2015}
}
@article{teney2014multiview,
title={Multiview feature distributions for object detection and continuous pose estimation},
author={Teney, Damien and Piater, Justus},
journal={Computer Vision and Image Understanding},
volume={125},
pages={265--282},
year={2014},
publisher={Elsevier}
}
@inproceedings{liu2019densepoint,
title={Densepoint: Learning densely contextual representation for efficient point cloud processing},
author={Liu, Yongcheng and Fan, Bin and Meng, Gaofeng and Lu, Jiwen and Xiang, Shiming and Pan, Chunhong},
booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
pages={5239--5248},
year={2019}
}
@article{micikevicius2017mixed,
title={Mixed precision training},
author={Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and others},
journal={arXiv preprint arXiv:1710.03740},
year={2017}
}
@inproceedings{kumawat2019lp,
title={Lp-3dcnn: Unveiling local phase in 3d convolutional neural networks},
author={Kumawat, Sudhakar and Raman, Shanmuganathan},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={4903--4912},
year={2019}
}
@misc{liu2019relationshape,
title={Relation-Shape Convolutional Neural Network for Point Cloud Analysis},
author={Yongcheng Liu and Bin Fan and Shiming Xiang and Chunhong Pan},
year={2019},
eprint={1904.07601},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@inproceedings{jiang2019mlvcnn,
title={MLVCNN: Multi-loop-view convolutional neural network for 3D shape retrieval},
author={Jiang, Jianwen and Bao, Di and Chen, Ziqiang and Zhao, Xibin and Gao, Yue},
booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
volume={33},
number={01},
pages={8513--8520},
year={2019}
}
@article{zhang2018inductive,
title={Inductive multi-hypergraph learning and its application on view-based 3D object classification},
author={Zhang, Zizhao and Lin, Haojie and Zhao, Xibin and Ji, Rongrong and Gao, Yue},
journal={IEEE Transactions on Image Processing},
volume={27},
number={12},
pages={5957--5968},
year={2018},
publisher={IEEE}
}
@article{thrun2002probabilistic,
title={Probabilistic robotics},
author={Thrun, Sebastian},
journal={Communications of the ACM},
volume={45},
number={3},
pages={52--57},
year={2002},
publisher={ACM New York, NY, USA}
}
@article{kumawat_lp-3dcnn_2019,
title = {{LP}-{3DCNN}: {Unveiling} {Local} {Phase} in {3D} {Convolutional} {Neural} {Networks}},
shorttitle = {{LP}-{3DCNN}},
url = {http://arxiv.org/abs/1904.03498},
abstract = {Traditional 3D Convolutional Neural Networks (CNNs) are computationally expensive, memory intensive, prone to overfit, and most importantly, there is a need to improve their feature learning capabilities. To address these issues, we propose Rectified Local Phase Volume (ReLPV) block, an efficient alternative to the standard 3D convolutional layer. The ReLPV block extracts the phase in a 3D local neighborhood (e.g., 3 × 3 × 3) of each position of the input map to obtain the feature maps. The phase is extracted by computing 3D Short Term Fourier Transform (STFT) at multiple fixed low frequency points in the 3D local neighborhood of each position. These feature maps at different frequency points are then linearly combined after passing them through an activation function. The ReLPV block provides significant parameter savings of at least, 33 to 133 times compared to the standard 3D convolutional layer with the filter sizes 3 × 3 × 3 to 13 × 13 × 13, respectively. We show that the feature learning capabilities of the ReLPV block are significantly better than the standard 3D convolutional layer. Furthermore, it produces consistently better results across different 3D data representations. We achieve state-of-the-art accuracy on the volumetric ModelNet10 and ModelNet40 datasets while utilizing only 11\% parameters of the current state-of-theart. We also improve the state-of-the-art on the UCF-101 split-1 action recognition dataset by 5.68\% (when trained from scratch) while using only 15\% of the parameters of the state-of-the-art. The project webpage is available at https://sites.google.com/view/lp-3dcnn/home.},
language = {en},
urldate = {2022-05-10},
journal = {arXiv:1904.03498 [cs]},
author = {Kumawat, Sudhakar and Raman, Shanmuganathan},
month = apr,
year = {2019},
note = {arXiv: 1904.03498},
keywords = {Computer Science - Computer Vision and Pattern Recognition},
file = {Kumawat and Raman - 2019 - LP-3DCNN Unveiling Local Phase in 3D Convolutiona.pdf:/home/erago/Zotero/storage/T8PNMNJI/Kumawat and Raman - 2019 - LP-3DCNN Unveiling Local Phase in 3D Convolutiona.pdf:application/pdf},
}
@article{zhang_inductive_2018,
title = {Inductive {Multi}-{Hypergraph} {Learning} and {Its} {Application} on {View}-{Based} {3D} {Object} {Classification}},
volume = {27},
issn = {1057-7149, 1941-0042},
url = {https://ieeexplore.ieee.org/document/8424480/},
doi = {10.1109/TIP.2018.2862625},
abstract = {The wide 3D applications have led to increasing amount of 3D object data, and thus effective 3D object classification technique has become urgent requirement. One important and challenging task for 3D object classification is how to formulate the 3D data correlation and exploit it. Most of previous works focus on learning optimal pairwise distance metric for object comparison, which may lose the global correlation among 3D objects. Recently, transductive hypergraph learning has been investigated for classification, which can jointly explore the correlation among multiple objects, including both the labeled and unlabeled data. Although these methods have shown better performance, they are still limited due to 1) a considerable amount of testing data may not be available in practice and 2) the high computational cost to test new coming data. To handle this problem, considering the multimodal representations of 3D objects in practice, we propose an inductive multi-hypergraph learning algorithm, which targets on learning an optimal projection for the multi-modal training data. In this method, all the training data are formulated in multihypergraph based on the features, and the inductive learning is conducted to learn the projection matrices and the optimal multi-hypergraph combination weights simultaneously. Different from the transductive learning on hypergraph, the high cost training process is off-line, and the testing process is very efficient for the inductive learning on hypergraph. We have conducted experiments on two 3D benchmarks, i.e., the NTU and the ModelNet40 datasets, and compared the proposed algorithm with the state-of-the-art methods and traditional transductive multi-hypergraph learning methods. Experimental results have demonstrated that the proposed method can achieve effective and efficient classification performance. We also note that the proposed method is a general framework and has the potential to be applied in other applications in practice.},
language = {en},
number = {12},
urldate = {2022-05-10},
journal = {IEEE Transactions on Image Processing},
author = {Zhang, Zizhao and Lin, Haojie and Zhao, Xibin and Ji, Rongrong and Gao, Yue},
month = dec,
year = {2018},
pages = {5957--5968},
file = {inductive-multihypergraph-learning-and-its-application-on-viewba-2018.pdf:/home/erago/Zotero/storage/T8PNMNJI/inductive-multihypergraph-learning-and-its-application-on-viewba-2018.pdf:application/pdf},
}
@article{liu_relation-shape_2019,
title = {Relation-{Shape} {Convolutional} {Neural} {Network} for {Point} {Cloud} {Analysis}},
url = {http://arxiv.org/abs/1904.07601},
abstract = {Point cloud analysis is very challenging, as the shape implied in irregular points is difficult to capture. In this paper, we propose RS-CNN, namely, Relation-Shape Convolutional Neural Network, which extends regular grid CNN to irregular configuration for point cloud analysis. The key to RS-CNN is learning from relation, i.e., the geometric topology constraint among points. Specifically, the convolutional weight for local point set is forced to learn a high-level relation expression from predefined geometric priors, between a sampled point from this point set and the others. In this way, an inductive local representation with explicit reasoning about the spatial layout of points can be obtained, which leads to much shape awareness and robustness. With this convolution as a basic operator, RS-CNN, a hierarchical architecture can be developed to achieve contextual shape-aware learning for point cloud analysis. Extensive experiments on challenging benchmarks across three tasks verify RS-CNN achieves the state of the arts.},
urldate = {2022-05-10},
journal = {arXiv:1904.07601 [cs]},
author = {Liu, Yongcheng and Fan, Bin and Xiang, Shiming and Pan, Chunhong},
month = may,
year = {2019},
note = {arXiv: 1904.07601},
keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Artificial Intelligence, Computer Science - Computational Geometry, Computer Science - Graphics, Computer Science - Robotics},
file = {arXiv Fulltext PDF:/home/erago/Zotero/storage/MNFJHGTQ/Liu et al. - 2019 - Relation-Shape Convolutional Neural Network for Po.pdf:application/pdf;arXiv.org Snapshot:/home/erago/Zotero/storage/2M4C8Z3Y/1904.html:text/html},
}
@article{zhang_linked_2019,
title = {Linked {Dynamic} {Graph} {CNN}: {Learning} on {Point} {Cloud} via {Linking} {Hierarchical} {Features}},
shorttitle = {Linked {Dynamic} {Graph} {CNN}},
url = {http://arxiv.org/abs/1904.10014},
abstract = {Learning on point cloud is eagerly in demand because the point cloud is a common type of geometric data and can aid robots to understand environments robustly. However, the point cloud is sparse, unstructured, and unordered, which cannot be recognized accurately by a traditional convolutional neural network (CNN) nor a recurrent neural network (RNN). Fortunately, a graph convolutional neural network (Graph CNN) can process sparse and unordered data. Hence, we propose a linked dynamic graph CNN (LDGCNN) to classify and segment point cloud directly in this paper. We remove the transformation network, link hierarchical features from dynamic graphs, freeze feature extractor, and retrain the classifier to increase the performance of LDGCNN. We explain our network using theoretical analysis and visualization. Through experiments, we show that the proposed LDGCNN achieves state-of-art performance on two standard datasets: ModelNet40 and ShapeNet.},
urldate = {2022-05-10},
journal = {arXiv:1904.10014 [cs]},
author = {Zhang, Kuangen and Hao, Ming and Wang, Jing and de Silva, Clarence W. and Fu, Chenglong},
month = aug,
year = {2019},
note = {arXiv: 1904.10014},
keywords = {Computer Science - Computer Vision and Pattern Recognition},
file = {arXiv Fulltext PDF:/home/erago/Zotero/storage/VE87R4GD/Zhang et al. - 2019 - Linked Dynamic Graph CNN Learning on Point Cloud .pdf:application/pdf;arXiv.org Snapshot:/home/erago/Zotero/storage/XBNQ44BK/1904.html:text/html},
}
@inproceedings{khan_unsupervised_2019,
address = {Long Beach, CA, USA},
title = {Unsupervised {Primitive} {Discovery} for {Improved} {3D} {Generative} {Modeling}},
isbn = {978-1-72813-293-8},
url = {https://ieeexplore.ieee.org/document/8954393/},
doi = {10.1109/CVPR.2019.00997},
language = {en},
urldate = {2022-05-10},
journal = {2019 {IEEE}/{CVF} {Conference} on {Computer} {Vision} and {Pattern Recognition} (CVPR)},
publisher = {IEEE},
author = {Khan, Salman H. and Guo, Yulan and Hayat, Munawar and Barnes, Nick},
month = jun,
year = {2019},
pages = {9731--9740},
file = {Khan et al. - 2019 - Unsupervised Primitive Discovery for Improved 3D G.pdf:/home/erago/Zotero/storage/WTJN3BVS/Khan et al. - 2019 - Unsupervised Primitive Discovery for Improved 3D G.pdf:application/pdf},
}
@article{jiang_mlvcnn_2019,
title = {{MLVCNN}: {Multi}-{Loop}-{View} {Convolutional} {Neural} {Network} for {3D} {Shape} {Retrieval}},
volume = {33},
issn = {2374-3468, 2159-5399},
shorttitle = {{MLVCNN}},
url = {http://aaai.org/ojs/index.php/AAAI/article/view/4869},
doi = {10.1609/aaai.v33i01.33018513},
abstract = {3D shape retrieval has attracted much attention and become a hot topic in computer vision field recently.With the development of deep learning, 3D shape retrieval has also made great progress and many view-based methods have been introduced in recent years. However, how to represent 3D shapes better is still a challenging problem. At the same time, the intrinsic hierarchical associations among views still have not been well utilized. In order to tackle these problems, in this paper, we propose a multi-loop-view convolutional neural network (MLVCNN) framework for 3D shape retrieval. In this method, multiple groups of views are extracted from different loop directions first. Given these multiple loop views, the proposed MLVCNN framework introduces a hierarchical view-loop-shape architecture, i.e., the view level, the loop level, and the shape level, to conduct 3D shape representation from different scales. In the view-level, a convolutional neural network is first trained to extract view features. Then, the proposed Loop Normalization and LSTM are utilized for each loop of view to generate the loop-level features, which considering the intrinsic associations of the different views in the same loop. Finally, all the loop-level descriptors are combined into a shape-level descriptor for 3D shape representation, which is used for 3D shape retrieval. Our proposed method has been evaluated on the public 3D shape benchmark, i.e., ModelNet40. Experiments and comparisons with the state-of-the-art methods show that the proposed MLVCNN method can achieve significant performance improvement on 3D shape retrieval tasks. Our MLVCNN outperforms the state-of-the-art methods by the mAP of 4.84\% in 3D shape retrieval task. We have also evaluated the performance of the proposed method on the 3D shape classification task where MLVCNN also achieves superior performance compared with recent methods.},
language = {en},
urldate = {2022-05-10},
journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
author = {Jiang, Jianwen and Bao, Di and Chen, Ziqiang and Zhao, Xibin and Gao, Yue},
month = jul,
year = {2019},
pages = {8513--8520},
file = {Jiang et al. - 2019 - MLVCNN Multi-Loop-View Convolutional Neural Netwo.pdf:/home/erago/Zotero/storage/HFBXJNGW/Jiang et al. - 2019 - MLVCNN Multi-Loop-View Convolutional Neural Netwo.pdf:application/pdf},
}
@article{han_3d2seqviews_2019,
title = {{3D2SeqViews}: {Aggregating} {Sequential} {Views} for {3D} {Global} {Feature} {Learning} by {CNN} {With} {Hierarchical} {Attention} {Aggregation}},
volume = {28},
issn = {1057-7149, 1941-0042},
shorttitle = {{3D2SeqViews}},
url = {https://ieeexplore.ieee.org/document/8666059/},
doi = {10.1109/TIP.2019.2904460},
language = {en},
number = {8},
urldate = {2022-05-10},
journal = {IEEE Transactions on Image Processing},
author = {Han, Zhizhong and Lu, Honglei and Liu, Zhenbao and Vong, Chi-Man and Liu, Yu-Shen and Zwicker, Matthias and Han, Junwei and Chen, C. L. Philip},
month = aug,
year = {2019},
pages = {3986--3999},
file = {Han et al. - 2019 - 3D2SeqViews Aggregating Sequential Views for 3D G.pdf:/home/erago/Zotero/storage/LMY7ERRG/Han et al. - 2019 - 3D2SeqViews Aggregating Sequential Views for 3D G.pdf:application/pdf},
}
@inproceedings{cheraghian_3dcapsule_2019,
address = {Waikoloa Village, HI, USA},
title = {{3DCapsule}: {Extending} the {Capsule} {Architecture} to {Classify} {3D} {Point} {Clouds}},
isbn = {978-1-72811-975-5},
shorttitle = {{3DCapsule}},
url = {https://ieeexplore.ieee.org/document/8658405/},
doi = {10.1109/WACV.2019.00132},
abstract = {This paper introduces the 3DCapsule, which is a 3D extension of the recently introduced Capsule concept that makes it applicable to unordered point sets. The original Capsule relies on the existence of a spatial relationship between the elements in the feature map it is presented with, whereas in point permutation invariant formulations of 3D point set classification methods, such relationships are typically lost. Here, a new layer called ComposeCaps is introduced that, in lieu of a spatially relevant feature mapping, learns a new mapping that can be exploited by the 3DCapsule. Previous works in the 3D point set classification domain have focused on other parts of the architecture, whereas instead, the 3DCapsule is a drop-in replacement of the commonly used fully connected classifier. It is demonstrated via an ablation study, that when the 3DCapsule is applied to recent 3D point set classification architectures, it consistently shows an improvement, in particular when subjected to noisy data. Similarly, the ComposeCaps layer is evaluated and demonstrates an improvement over the baseline. In an apples-to-apples comparison against state-ofthe-art methods, again, better performance is demonstrated by the 3DCapsule.},
language = {en},
urldate = {2022-05-10},
booktitle = {2019 {IEEE} {Winter} {Conference} on {Applications} of {Computer} {Vision} ({WACV})},
publisher = {IEEE},
author = {Cheraghian, Ali and Petersson, Lars},
month = jan,
year = {2019},
pages = {1194--1202},
file = {Cheraghian and Petersson - 2019 - 3DCapsule Extending the Capsule Architecture to C.pdf:/home/erago/Zotero/storage/4H6GV4V8/Cheraghian and Petersson - 2019 - 3DCapsule Extending the Capsule Architecture to C.pdf:application/pdf},
}
@article{kanezaki_rotationnet_2021,
title = {{RotationNet} for {Joint} {Object} {Categorization} and {Unsupervised} {Pose} {Estimation} from {Multi}-{View} {Images}},
volume = {43},
issn = {0162-8828, 2160-9292, 1939-3539},
url = {https://ieeexplore.ieee.org/document/8736864/},
doi = {10.1109/TPAMI.2019.2922640},
abstract = {We propose a Convolutional Neural Network (CNN)-based model “RotationNet,” which takes multi-view images of an object as input and jointly estimates its pose and object category. Unlike previous approaches that use known viewpoint labels for training, our method treats the viewpoint labels as latent variables, which are learned in an unsupervised manner during the training using an unaligned object dataset. RotationNet uses only a partial set of multi-view images for inference, and this property makes it useful in practical scenarios where only partial views are available. Moreover, our pose alignment strategy enables one to obtain view-specific feature representations shared across classes, which is important to maintain high accuracy in both object categorization and pose estimation. Effectiveness of RotationNet is demonstrated by its superior performance to the state-of-the-art methods of 3D object classification on 10- and 40-class ModelNet datasets. We also show that RotationNet, even trained without known poses, achieves comparable performance to the state-of-the-art methods on an object pose estimation dataset. Furthermore, our object ranking method based on classification by RotationNet achieved the first prize in two tracks of the 3D Shape Retrieval Contest (SHREC) 2017. Finally, we demonstrate the performance of real-world applications of RotationNet trained with our newly created multi-view image dataset using a moving USB camera.},
language = {en},
number = {1},
urldate = {2022-05-10},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
author = {Kanezaki, Asako and Matsushita, Yasuyuki and Nishida, Yoshifumi},
month = jan,
year = {2021},
pages = {269--283},
file = {Kanezaki et al. - 2021 - RotationNet for Joint Object Categorization and Un.pdf:/home/erago/Zotero/storage/JY98IMEX/Kanezaki et al. - 2021 - RotationNet for Joint Object Categorization and Un.pdf:application/pdf},
}
@article{wang_normalnet_2019,
title = {{NormalNet}: {A} voxel-based {CNN} for {3D} object classification and retrieval},
volume = {323},
issn = {09252312},
shorttitle = {{NormalNet}},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0925231218311561},
doi = {10.1016/j.neucom.2018.09.075},
language = {en},
urldate = {2022-05-10},
journal = {Neurocomputing},
author = {Wang, Cheng and Cheng, Ming and Sohel, Ferdous and Bennamoun, Mohammed and Li, Jonathan},
month = jan,
year = {2019},
pages = {139--147},
file = {Wang et al. - 2019 - NormalNet A voxel-based CNN for 3D object classif.pdf:/home/erago/Zotero/storage/WM22HW55/Wang et al. - 2019 - NormalNet A voxel-based CNN for 3D object classif.pdf:application/pdf},
}
@article{xuan_mv-c3d_2019,
title = {{MV}-{C3D}: {A} {Spatial} {Correlated} {Multi}-{View} {3D} {Convolutional} {Neural} {Networks}},
volume = {7},
issn = {2169-3536},
shorttitle = {{MV}-{C3D}},
doi = {10.1109/ACCESS.2019.2923022},
abstract = {As the development of deep neural networks, 3D object recognition is becoming increasingly popular in the computer vision community. Many multi-view-based methods are proposed to improve the category recognition accuracy. These approaches mainly rely on multi-view images that are rendered with the whole circumference. In real-world applications, however, 3D objects are mostly observed from partial viewpoints in a less range. Therefore, we propose a multi-view-based 3D convolutional neural network that takes only part of contiguous multi-view images as input and can still maintain high accuracy. Moreover, our model takes these view images as a joint variable to better learn the spatially correlated features using 3D convolution and 3D max-pooling layers. The experimental results on ModelNet10 and ModelNet40 datasets show that our MV-C3D technique can achieve outstanding performance with multi-view images that are captured from partial angles with less range. The results on 3D-rotated real image dataset MIRO further demonstrate that MV-C3D is more adaptable in real-world scenarios. The classification accuracy can be further improved with the increasing number of view images.},
journal = {IEEE Access},
author = {Xuan, Qi and Li, Fuxian and Liu, Yi and Xiang, Yun},
year = {2019},
note = {Conference Name: IEEE Access},
keywords = {3D object classification, Convolution, convolutional neural network, Convolutional neural networks, Correlation, deep learning, Kernel, multi-view, Solid modeling, Three-dimensional displays, Two dimensional displays},
pages = {92528--92538},
file = {IEEE Xplore Abstract Record:/home/erago/Zotero/storage/IKV2NZUF/8736713.html:text/html;IEEE Xplore Full Text PDF:/home/erago/Zotero/storage/9538VQQ4/Xuan et al. - 2019 - MV-C3D A Spatial Correlated Multi-View 3D Convolu.pdf:application/pdf},
}
@article{ma_learning_2019,
title = {Learning {Multi}-{View} {Representation} {With} {LSTM} for 3-{D} {Shape} {Recognition} and {Retrieval}},
volume = {21},
issn = {1520-9210, 1941-0077},
url = {https://ieeexplore.ieee.org/document/8490588/},
doi = {10.1109/TMM.2018.2875512},
abstract = {Shape representation for 3-D models is an important topic in computer vision, multimedia analysis, and computer graphics. Recent multiview-based methods demonstrate promising performance for 3-D shape recognition and retrieval. However, most multiview-based methods ignore the correlations of multiple views or suffer from high computional cost. In this paper, we propose a novel multiview-based network architecture for 3-D shape recognition and retrieval. Our network combines convolutional neural networks (CNNs) with long short-term memory (LSTM) to exploit the correlative information from multiple views. Well-pretrained CNNs with residual connections are first used to extract a low-level feature of each view image rendered from a 3-D shape. Then, a LSTM and a sequence voting layer are employed to aggregate these features into a shape descriptor. The highway network and a three-step training strategy are also adopted to boost the optimization of the deep network. Experimental results on two public datasets demonstrate that the proposed method achieves promising performance for 3-D shape recognition and the state-of-the-art performance for the 3-D shape retrieval.},
language = {en},
number = {5},
urldate = {2022-05-10},
journal = {IEEE Transactions on Multimedia},
author = {Ma, Chao and Guo, Yulan and Yang, Jungang and An, Wei},
month = may,
year = {2019},
pages = {1169--1182},
file = {Ma et al. - 2019 - Learning Multi-View Representation With LSTM for 3.pdf:/home/erago/Zotero/storage/8SCQU9C9/Ma et al. - 2019 - Learning Multi-View Representation With LSTM for 3.pdf:application/pdf},
}
@article{kasaei_orthographicnet_2020,
title = {{OrthographicNet}: {A} {Deep} {Transfer} {Learning} {Approach} for {3D} {Object} {Recognition} in {Open}-{Ended} {Domains}},
shorttitle = {{OrthographicNet}},
url = {http://arxiv.org/abs/1902.03057},
abstract = {Nowadays, service robots are appearing more and more in our daily life. For this type of robot, open-ended object category learning and recognition is necessary since no matter how extensive the training data used for batch learning, the robot might be faced with a new object when operating in a real-world environment. In this work, we present OrthographicNet, a Convolutional Neural Network (CNN)-based model, for 3D object recognition in open-ended domains. In particular, OrthographicNet generates a global rotation- and scale-invariant representation for a given 3D object, enabling robots to recognize the same or similar objects seen from different perspectives. Experimental results show that our approach yields significant improvements over the previous state-of-the-art approaches concerning object recognition performance and scalability in open-ended scenarios. Moreover, OrthographicNet demonstrates the capability of learning new categories from very few examples on-site. Regarding real-time performance, three real-world demonstrations validate the promising performance of the proposed architecture.},
urldate = {2022-05-12},
journal = {arXiv:1902.03057 [cs]},
author = {Kasaei, Hamidreza},
month = dec,
year = {2020},
note = {arXiv: 1902.03057},
keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Robotics},
file = {arXiv Fulltext PDF:/home/erago/Zotero/storage/4YVS65GP/Kasaei - 2020 - OrthographicNet A Deep Transfer Learning Approach.pdf:application/pdf;arXiv.org Snapshot:/home/erago/Zotero/storage/IJ7UH3HT/1902.html:text/html},
}
@article{bonaventura_survey_2018,
title = {A {Survey} of {Viewpoint} {Selection} {Methods} for {Polygonal} {Models}},
volume = {20},
issn = {1099-4300},
url = {http://www.mdpi.com/1099-4300/20/5/370},
doi = {10.3390/e20050370},
abstract = {Viewpoint selection has been an emerging area in computer graphics for some years, and it is now getting maturity with applications in fields such as scene navigation, scientific visualization, object recognition, mesh simplification, and camera placement. In this survey, we review and compare twenty-two measures to select good views of a polygonal 3D model, classify them using an extension of the categories defined by Secord et al., and evaluate them against the Dutagaci et al. benchmark. Eleven of these measures have not been reviewed in previous surveys. Three out of the five short-listed best viewpoint measures are directly related to information. We also present in which fields the different viewpoint measures have been applied. Finally, we provide a publicly available framework where all the viewpoint selection measures are implemented and can be compared against each other.},
language = {en},
number = {5},
urldate = {2022-05-14},
journal = {Entropy},
author = {Bonaventura, Xavier and Feixas, Miquel and Sbert, Mateu and Chuang, Lewis and Wallraven, Christian},
month = may,
year = {2018},
pages = {370},
file = {Bonaventura et al. - 2018 - A Survey of Viewpoint Selection Methods for Polygo.pdf:/home/erago/Zotero/storage/LC7Y8LTY/Bonaventura et al. - 2018 - A Survey of Viewpoint Selection Methods for Polygo.pdf:application/pdf},
}