references.bib

@inproceedings{su2015multi,
  title={Multi-view convolutional neural networks for {3D} shape recognition},
  author={Su, Hang and Maji, Subhransu and Kalogerakis, Evangelos and Learned-Miller, Erik},
  booktitle={Proceedings of the IEEE international conference on computer vision},
  year={2015}
}
@article{zoph2016neural,
  title={Neural architecture search with reinforcement learning},
  author={Zoph, Barret and Le, Quoc V},
  journal={arXiv preprint arXiv:1611.01578},
  year={2016}
}
@inproceedings{wu20153d,
  title={{3D} shapenets: A deep representation for volumetric shapes},
  author={Wu, Zhirong and Song, Shuran and Khosla, Aditya and Yu, Fisher and Zhang, Linguang and Tang, Xiaoou and Xiao, Jianxiong},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={1912--1920},
  year={2015}
}
@inproceedings{kanezaki2018_rotationnet,
	title={RotationNet: Joint Object Categorization and Pose Estimation Using Multiviews from Unsupervised Viewpoints},
	author={Asako Kanezaki and Yasuyuki Matsushita and Yoshifumi Nishida},
	booktitle={Proc.\ of IEEE International Conference on Computer Vision and Pattern Recognition (CVPR)},
	year={2018},}
@article{brock2016generative,
  title={Generative and discriminative voxel modeling with convolutional neural networks},
  author={Brock, Andrew and Lim, Theodore and Ritchie, James M and Weston, Nick},
  journal={arXiv preprint arXiv:1608.04236},
  year={2016}
}
@article{munozextending,
  title={Extending {GG-CNN} through Automated Model Space Exploration using Knowledge Transfer},
  author={Mu{\~n}oz, Mario R{\'\i}os and Schomaker, Lambert and Kasaei, S Hamidreza}
}
@inproceedings{morrison2018closing,
	title={{Closing the Loop for Robotic Grasping: A Real-time, Generative Grasp Synthesis Approach}},
	author={Morrison, Douglas and Corke, Peter and Leitner, J\"urgen},
	booktitle={Proc.\ of Robotics: Science and Systems (RSS)},
	year={2018}
}
@inproceedings{sift,
  title={Object recognition from local scale-invariant features},
  author={Lowe, David G},
  booktitle={Proceedings of the seventh IEEE international conference on computer vision},
  volume={2},
  pages={1150--1157},
  year={1999},
  organization={Ieee}
}

@article{canny,
  title={A computational approach to edge detection},
  author={Canny, John},
  journal={IEEE Transactions on pattern analysis and machine intelligence},
  number={6},
  pages={679--698},
  year={1986},
  publisher={Ieee}
}

@inproceedings{surf,
  title={Surf: Speeded up robust features},
  author={Bay, Herbert and Tuytelaars, Tinne and Van Gool, Luc},
  booktitle={European conference on computer vision},
  pages={404--417},
  year={2006},
  organization={Springer}
}

@article{vgg,
  title={Very deep convolutional networks for large-scale image recognition},
  author={Simonyan, Karen and Zisserman, Andrew},
  journal={arXiv preprint arXiv:1409.1556},
  year={2014}
}

@article{mobilenet,
  title={Mobilenets: Efficient convolutional neural networks for mobile vision applications},
  author={Howard, Andrew G and Zhu, Menglong and Chen, Bo and Kalenichenko, Dmitry and Wang, Weijun and Weyand, Tobias and Andreetto, Marco and Adam, Hartwig},
  journal={arXiv preprint arXiv:1704.04861},
  year={2017}
}

@inproceedings{resnet,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

@inproceedings{imagenet,
    author = {Deng, J. and Dong, W. and Socher, R. and Li, L.-J. and Li, K. and Fei-Fei, L.},
    title = {{ImageNet: A Large-Scale Hierarchical Image Database}},
    booktitle = {CVPR09},
    year = {2009},
    bibsource = "http://www.image-net.org/papers/imagenet_cvpr09.bib"}
    
@inproceedings{voxnet,
  title={{VoxNet}: A {3D} convolutional neural network for real-time object recognition},
  author={Maturana, Daniel and Scherer, Sebastian},
  year={2015},
  booktitle={2015 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
  organization={IEEE}
}

@inproceedings{kasaei2018perceiving,
  title={Perceiving, learning, and recognizing 3d objects: An approach to cognitive service robots},
  author={Kasaei, S and Sock, Juil and Lopes, Luis Seabra and Tom{\'e}, Ana Maria and Kim, Tae-Kyun},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  year={2018}
}

@article{fpnn,
  title={Fpnn: Field probing neural networks for {3D} data},
  author={Li, Yangyan and Pirk, Soeren and Su, Hao and Qi, Charles R and Guibas, Leonidas J},
  journal={arXiv preprint arXiv:1605.06240},
  year={2016}
}

@article{3dgan,
  title={Learning a probabilistic latent space of object shapes via {3D} generative-adversarial modeling},
  author={Wu, Jiajun and Zhang, Chengkai and Xue, Tianfan and Freeman, William T and Tenenbaum, Joshua B},
  journal={arXiv preprint arXiv:1610.07584},
  year={2016}
}

@inproceedings{savarese20073d,
  title={{3D} generic object categorization, localization and pose estimation},
  author={Savarese, Silvio and Fei-Fei, Li},
  booktitle={2007 IEEE 11th International Conference on Computer Vision},
  pages={1--8},
  year={2007},
  organization={IEEE}
}

@inproceedings{lai2011scalable,
  title={A scalable tree-based approach for joint object and pose recognition},
  author={Lai, Kevin and Bo, Liefeng and Ren, Xiaofeng and Fox, Dieter},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  year={2011}
}
@InProceedings{Sock_2017_ICCV,
author = {Sock, Juil and Hamidreza Kasaei, S. and Seabra Lopes, Luis and Kim, Tae-Kyun},
title = {Multi-View {6D} Object Pose Estimation and Camera Motion Planning Using RGBD Images},
booktitle = {Proceedings of the IEEE International Conference on Computer Vision (ICCV) Workshops},
year = {2017}
}
@inproceedings{zhang2013joint,
  title={Joint object and pose recognition using homeomorphic manifold analysis},
  author={Zhang, Haopeng and El-Gaaly, Tarek and Elgammal, Ahmed and Jiang, Zhiguo},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  year={2013}
}

@inproceedings{bakry2014untangling,
  title={Untangling object-view manifold for multiview recognition and pose estimation},
  author={Bakry, Amr and Elgammal, Ahmed},
  booktitle={European conference on computer vision},
  pages={434--449},
  year={2014},
  organization={Springer}
}

@inproceedings{qi2016volumetric,
  title={Volumetric and multi-view {CNN}s for object classification on {3D} data},
  author={Qi, Charles R and Su, Hao and Nie{\ss}ner, Matthias and Dai, Angela and Yan, Mengyuan and Guibas, Leonidas J},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={5648--5656},
  year={2016}
}

@inproceedings{elhoseiny2016comparative,
  title={A comparative analysis and study of multiview {CNN} models for joint object categorization and pose estimation},
  author={Elhoseiny, Mohamed and El-Gaaly, Tarek and Bakry, Amr and Elgammal, Ahmed},
  booktitle={International Conference on Machine learning},
  pages={888--897},
  year={2016},
  organization={PMLR}
}

@inproceedings{kuznetsova2016exploiting,
  title={Exploiting view-specific appearance similarities across classes for zero-shot pose prediction: A metric learning approach},
  author={Kuznetsova, Alina and Hwang, Sung Ju and Rosenhahn, Bodo and Sigal, Leonid},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  volume={30},
  number={1},
  year={2016}
}

@inproceedings{dutagaci2010benchmark,
  title={A benchmark for best view selection of {3D} objects},
  author={Dutagaci, Helin and Cheung, Chun Pan and Godil, Afzal},
  booktitle={Proceedings of the ACM workshop on {3D} object retrieval},
  pages={45--50},
  year={2010}
}

@article{polonsky2005s,
  title={What’s in an image?},
  author={Polonsky, Oleg and Patan{\'e}, Giuseppe and Biasotti, Silvia and Gotsman, Craig and Spagnuolo, Michela},
  journal={The Visual Computer},
  volume={21},
  number={8},
  pages={840--847},
  year={2005},
  publisher={Springer}
}

@inproceedings{vazquez2001viewpoint,
  title={Viewpoint selection using viewpoint entropy.},
  author={V{\'a}zquez, Pere-Pau and Feixas, Miquel and Sbert, Mateu and Heidrich, Wolfgang},
  booktitle={VMV},
  volume={1},
  pages={273--280},
  year={2001},
  organization={Citeseer}
}

@inproceedings{page2003shape,
  title={Shape analysis algorithm based on information theory},
  author={Page, David L and Koschan, Andreas F and Sukumar, Sreenivas R and Roui-Abidi, Besma and Abidi, Mongi A},
  booktitle={Proceedings 2003 International Conference on Image Processing (Cat. No. 03CH37429)},
  volume={1},
  pages={I--229},
  year={2003},
  organization={IEEE}
}

@incollection{lee2005mesh,
  title={Mesh saliency},
  author={Lee, Chang Ha and Varshney, Amitabh and Jacobs, David W},
  booktitle={ACM SIGGRAPH 2005 Papers},
  pages={659--666},
  year={2005}
}

@article{zhou2018,
  author={Qian-Yi Zhou and Jaesik Park and Vladlen Koltun},
  title={{Open3D}: {A} Modern Library for {3D} Data Processing},
  journal={arXiv:1801.09847},
  year={2018},
}

@article{li2017hyperband,
  title={Hyperband: A novel bandit-based approach to hyperparameter optimization},
  author={Li, Lisha and Jamieson, Kevin and DeSalvo, Giulia and Rostamizadeh, Afshin and Talwalkar, Ameet},
  journal={The Journal of Machine Learning Research},
  volume={18},
  number={1},
  pages={6765--6816},
  year={2017},
  publisher={JMLR.org}
}

@inproceedings{sandler2018mobilenetv2,
  title={Mobilenetv2: Inverted residuals and linear bottlenecks},
  author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={4510--4520},
  year={2018}
}

@inproceedings{efficientnet,
  title={Efficientnet: Rethinking model scaling for convolutional neural networks},
  author={Tan, Mingxing and Le, Quoc},
  booktitle={International Conference on Machine Learning},
  pages={6105--6114},
  year={2019},
  organization={PMLR}
}

@article{adam,
  title={Adam: A method for stochastic optimization},
  author={Kingma, Diederik P and Ba, Jimmy},
  journal={arXiv preprint arXiv:1412.6980},
  year={2014}
}

@inproceedings{pepik20153d,
  title={3d object class detection in the wild},
  author={Pepik, Bojan and Stark, Michael and Gehler, Peter and Ritschel, Tobias and Schiele, Bernt},
  booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops},
  pages={1--10},
  year={2015}
}

@article{teney2014multiview,
  title={Multiview feature distributions for object detection and continuous pose estimation},
  author={Teney, Damien and Piater, Justus},
  journal={Computer Vision and Image Understanding},
  volume={125},
  pages={265--282},
  year={2014},
  publisher={Elsevier}
}

@inproceedings{liu2019densepoint,
  title={Densepoint: Learning densely contextual representation for efficient point cloud processing},
  author={Liu, Yongcheng and Fan, Bin and Meng, Gaofeng and Lu, Jiwen and Xiang, Shiming and Pan, Chunhong},
  booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
  pages={5239--5248},
  year={2019}
}

@article{micikevicius2017mixed,
  title={Mixed precision training},
  author={Micikevicius, Paulius and Narang, Sharan and Alben, Jonah and Diamos, Gregory and Elsen, Erich and Garcia, David and Ginsburg, Boris and Houston, Michael and Kuchaiev, Oleksii and Venkatesh, Ganesh and others},
  journal={arXiv preprint arXiv:1710.03740},
  year={2017}
}


@inproceedings{kumawat2019lp,
  title={Lp-3dcnn: Unveiling local phase in 3d convolutional neural networks},
  author={Kumawat, Sudhakar and Raman, Shanmuganathan},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={4903--4912},
  year={2019}
}

@misc{liu2019relationshape,
      title={Relation-Shape Convolutional Neural Network for Point Cloud Analysis}, 
      author={Yongcheng Liu and Bin Fan and Shiming Xiang and Chunhong Pan},
      year={2019},
      eprint={1904.07601},
      archivePrefix={arXiv},
      primaryClass={cs.CV}
}

@inproceedings{jiang2019mlvcnn,
  title={MLVCNN: Multi-loop-view convolutional neural network for 3D shape retrieval},
  author={Jiang, Jianwen and Bao, Di and Chen, Ziqiang and Zhao, Xibin and Gao, Yue},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  volume={33},
  number={01},
  pages={8513--8520},
  year={2019}
}

@article{zhang2018inductive,
  title={Inductive multi-hypergraph learning and its application on view-based 3D object classification},
  author={Zhang, Zizhao and Lin, Haojie and Zhao, Xibin and Ji, Rongrong and Gao, Yue},
  journal={IEEE Transactions on Image Processing},
  volume={27},
  number={12},
  pages={5957--5968},
  year={2018},
  publisher={IEEE}
}
@article{thrun2002probabilistic,
  title={Probabilistic robotics},
  author={Thrun, Sebastian},
  journal={Communications of the ACM},
  volume={45},
  number={3},
  pages={52--57},
  year={2002},
  publisher={ACM New York, NY, USA}
}


@article{kumawat_lp-3dcnn_2019,
	title = {{LP}-{3DCNN}: {Unveiling} {Local} {Phase} in {3D} {Convolutional} {Neural} {Networks}},
	shorttitle = {{LP}-{3DCNN}},
	url = {http://arxiv.org/abs/1904.03498},
	abstract = {Traditional 3D Convolutional Neural Networks (CNNs) are computationally expensive, memory intensive, prone to overﬁt, and most importantly, there is a need to improve their feature learning capabilities. To address these issues, we propose Rectiﬁed Local Phase Volume (ReLPV) block, an efﬁcient alternative to the standard 3D convolutional layer. The ReLPV block extracts the phase in a 3D local neighborhood (e.g., 3 × 3 × 3) of each position of the input map to obtain the feature maps. The phase is extracted by computing 3D Short Term Fourier Transform (STFT) at multiple ﬁxed low frequency points in the 3D local neighborhood of each position. These feature maps at different frequency points are then linearly combined after passing them through an activation function. The ReLPV block provides signiﬁcant parameter savings of at least, 33 to 133 times compared to the standard 3D convolutional layer with the ﬁlter sizes 3 × 3 × 3 to 13 × 13 × 13, respectively. We show that the feature learning capabilities of the ReLPV block are signiﬁcantly better than the standard 3D convolutional layer. Furthermore, it produces consistently better results across different 3D data representations. We achieve state-of-the-art accuracy on the volumetric ModelNet10 and ModelNet40 datasets while utilizing only 11\% parameters of the current state-of-theart. We also improve the state-of-the-art on the UCF-101 split-1 action recognition dataset by 5.68\% (when trained from scratch) while using only 15\% of the parameters of the state-of-the-art. The project webpage is available at https://sites.google.com/view/lp-3dcnn/home.},
	language = {en},
	urldate = {2022-05-10},
	journal = {arXiv:1904.03498 [cs]},
	author = {Kumawat, Sudhakar and Raman, Shanmuganathan},
	month = apr,
	year = {2019},
	note = {arXiv: 1904.03498},
	keywords = {Computer Science - Computer Vision and Pattern Recognition},
	file = {Kumawat and Raman - 2019 - LP-3DCNN Unveiling Local Phase in 3D Convolutiona.pdf:/home/erago/Zotero/storage/T8PNMNJI/Kumawat and Raman - 2019 - LP-3DCNN Unveiling Local Phase in 3D Convolutiona.pdf:application/pdf},
}

@article{zhang_inductive_2018,
	title = {Inductive {Multi}-{Hypergraph} {Learning} and {Its} {Application} on {View}-{Based} {3D} {Object} {Classification}},
	volume = {27},
	issn = {1057-7149, 1941-0042},
	url = {https://ieeexplore.ieee.org/document/8424480/},
	doi = {10.1109/TIP.2018.2862625},
	abstract = {The wide 3D applications have led to increasing amount of 3D object data, and thus effective 3D object classiﬁcation technique has become urgent requirement. One important and challenging task for 3D object classiﬁcation is how to formulate the 3D data correlation and exploit it. Most of previous works focus on learning optimal pairwise distance metric for object comparison, which may lose the global correlation among 3D objects. Recently, transductive hypergraph learning has been investigated for classiﬁcation, which can jointly explore the correlation among multiple objects, including both the labeled and unlabeled data. Although these methods have shown better performance, they are still limited due to 1) a considerable amount of testing data may not be available in practice and 2) the high computational cost to test new coming data. To handle this problem, considering the multimodal representations of 3D objects in practice, we propose an inductive multi-hypergraph learning algorithm, which targets on learning an optimal projection for the multi-modal training data. In this method, all the training data are formulated in multihypergraph based on the features, and the inductive learning is conducted to learn the projection matrices and the optimal multi-hypergraph combination weights simultaneously. Different from the transductive learning on hypergraph, the high cost training process is off-line, and the testing process is very efﬁcient for the inductive learning on hypergraph. We have conducted experiments on two 3D benchmarks, i.e., the NTU and the ModelNet40 datasets, and compared the proposed algorithm with the state-of-the-art methods and traditional transductive multi-hypergraph learning methods. Experimental results have demonstrated that the proposed method can achieve effective and efﬁcient classiﬁcation performance. We also note that the proposed method is a general framework and has the potential to be applied in other applications in practice.},
	language = {en},
	number = {12},
	urldate = {2022-05-10},
	journal = {IEEE Transactions on Image Processing},
	author = {Zhang, Zizhao and Lin, Haojie and Zhao, Xibin and Ji, Rongrong and Gao, Yue},
	month = dec,
	year = {2018},
	pages = {5957--5968},
	file = {inductive-multihypergraph-learning-and-its-application-on-viewba-2018.pdf:/home/erago/Zotero/storage/T8PNMNJI/inductive-multihypergraph-learning-and-its-application-on-viewba-2018.pdf:application/pdf},
}

@article{liu_relation-shape_2019,
	title = {Relation-{Shape} {Convolutional} {Neural} {Network} for {Point} {Cloud} {Analysis}},
	url = {http://arxiv.org/abs/1904.07601},
	abstract = {Point cloud analysis is very challenging, as the shape implied in irregular points is difficult to capture. In this paper, we propose RS-CNN, namely, Relation-Shape Convolutional Neural Network, which extends regular grid CNN to irregular configuration for point cloud analysis. The key to RS-CNN is learning from relation, i.e., the geometric topology constraint among points. Specifically, the convolutional weight for local point set is forced to learn a high-level relation expression from predefined geometric priors, between a sampled point from this point set and the others. In this way, an inductive local representation with explicit reasoning about the spatial layout of points can be obtained, which leads to much shape awareness and robustness. With this convolution as a basic operator, RS-CNN, a hierarchical architecture can be developed to achieve contextual shape-aware learning for point cloud analysis. Extensive experiments on challenging benchmarks across three tasks verify RS-CNN achieves the state of the arts.},
	urldate = {2022-05-10},
	journal = {arXiv:1904.07601 [cs]},
	author = {Liu, Yongcheng and Fan, Bin and Xiang, Shiming and Pan, Chunhong},
	month = may,
	year = {2019},
	note = {arXiv: 1904.07601},
	keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Artificial Intelligence, Computer Science - Computational Geometry, Computer Science - Graphics, Computer Science - Robotics},
	file = {arXiv Fulltext PDF:/home/erago/Zotero/storage/MNFJHGTQ/Liu et al. - 2019 - Relation-Shape Convolutional Neural Network for Po.pdf:application/pdf;arXiv.org Snapshot:/home/erago/Zotero/storage/2M4C8Z3Y/1904.html:text/html},
}

@article{zhang_linked_2019,
	title = {Linked {Dynamic} {Graph} {CNN}: {Learning} on {Point} {Cloud} via {Linking} {Hierarchical} {Features}},
	shorttitle = {Linked {Dynamic} {Graph} {CNN}},
	url = {http://arxiv.org/abs/1904.10014},
	abstract = {Learning on point cloud is eagerly in demand because the point cloud is a common type of geometric data and can aid robots to understand environments robustly. However, the point cloud is sparse, unstructured, and unordered, which cannot be recognized accurately by a traditional convolutional neural network (CNN) nor a recurrent neural network (RNN). Fortunately, a graph convolutional neural network (Graph CNN) can process sparse and unordered data. Hence, we propose a linked dynamic graph CNN (LDGCNN) to classify and segment point cloud directly in this paper. We remove the transformation network, link hierarchical features from dynamic graphs, freeze feature extractor, and retrain the classifier to increase the performance of LDGCNN. We explain our network using theoretical analysis and visualization. Through experiments, we show that the proposed LDGCNN achieves state-of-art performance on two standard datasets: ModelNet40 and ShapeNet.},
	urldate = {2022-05-10},
	journal = {arXiv:1904.10014 [cs]},
	author = {Zhang, Kuangen and Hao, Ming and Wang, Jing and de Silva, Clarence W. and Fu, Chenglong},
	month = aug,
	year = {2019},
	note = {arXiv: 1904.10014},
	keywords = {Computer Science - Computer Vision and Pattern Recognition},
	file = {arXiv Fulltext PDF:/home/erago/Zotero/storage/VE87R4GD/Zhang et al. - 2019 - Linked Dynamic Graph CNN Learning on Point Cloud .pdf:application/pdf;arXiv.org Snapshot:/home/erago/Zotero/storage/XBNQ44BK/1904.html:text/html},
}

@inproceedings{khan_unsupervised_2019,
	address = {Long Beach, CA, USA},
	title = {Unsupervised {Primitive} {Discovery} for {Improved} {3D} {Generative} {Modeling}},
	isbn = {978-1-72813-293-8},
	url = {https://ieeexplore.ieee.org/document/8954393/},
	doi = {10.1109/CVPR.2019.00997},
	language = {en},
	urldate = {2022-05-10},
	journal = {2019 {IEEE}/{CVF} {Conference} on {Computer} {Vision} and {Pattern Recognition} (CVPR)},
	publisher = {IEEE},
	author = {Khan, Salman H. and Guo, Yulan and Hayat, Munawar and Barnes, Nick},
	month = jun,
	year = {2019},
	pages = {9731--9740},
	file = {Khan et al. - 2019 - Unsupervised Primitive Discovery for Improved 3D G.pdf:/home/erago/Zotero/storage/WTJN3BVS/Khan et al. - 2019 - Unsupervised Primitive Discovery for Improved 3D G.pdf:application/pdf},
}

@article{jiang_mlvcnn_2019,
	title = {{MLVCNN}: {Multi}-{Loop}-{View} {Convolutional} {Neural} {Network} for {3D} {Shape} {Retrieval}},
	volume = {33},
	issn = {2374-3468, 2159-5399},
	shorttitle = {{MLVCNN}},
	url = {http://aaai.org/ojs/index.php/AAAI/article/view/4869},
	doi = {10.1609/aaai.v33i01.33018513},
	abstract = {3D shape retrieval has attracted much attention and become a hot topic in computer vision field recently.With the development of deep learning, 3D shape retrieval has also made great progress and many view-based methods have been introduced in recent years. However, how to represent 3D shapes better is still a challenging problem. At the same time, the intrinsic hierarchical associations among views still have not been well utilized. In order to tackle these problems, in this paper, we propose a multi-loop-view convolutional neural network (MLVCNN) framework for 3D shape retrieval. In this method, multiple groups of views are extracted from different loop directions first. Given these multiple loop views, the proposed MLVCNN framework introduces a hierarchical view-loop-shape architecture, i.e., the view level, the loop level, and the shape level, to conduct 3D shape representation from different scales. In the view-level, a convolutional neural network is first trained to extract view features. Then, the proposed Loop Normalization and LSTM are utilized for each loop of view to generate the loop-level features, which considering the intrinsic associations of the different views in the same loop. Finally, all the loop-level descriptors are combined into a shape-level descriptor for 3D shape representation, which is used for 3D shape retrieval. Our proposed method has been evaluated on the public 3D shape benchmark, i.e., ModelNet40. Experiments and comparisons with the state-of-the-art methods show that the proposed MLVCNN method can achieve significant performance improvement on 3D shape retrieval tasks. Our MLVCNN outperforms the state-of-the-art methods by the mAP of 4.84\% in 3D shape retrieval task. We have also evaluated the performance of the proposed method on the 3D shape classification task where MLVCNN also achieves superior performance compared with recent methods.},
	language = {en},
	urldate = {2022-05-10},
	journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
	author = {Jiang, Jianwen and Bao, Di and Chen, Ziqiang and Zhao, Xibin and Gao, Yue},
	month = jul,
	year = {2019},
	pages = {8513--8520},
	file = {Jiang et al. - 2019 - MLVCNN Multi-Loop-View Convolutional Neural Netwo.pdf:/home/erago/Zotero/storage/HFBXJNGW/Jiang et al. - 2019 - MLVCNN Multi-Loop-View Convolutional Neural Netwo.pdf:application/pdf},
}

@article{han_3d2seqviews_2019,
	title = {{3D2SeqViews}: {Aggregating} {Sequential} {Views} for {3D} {Global} {Feature} {Learning} by {CNN} {With} {Hierarchical} {Attention} {Aggregation}},
	volume = {28},
	issn = {1057-7149, 1941-0042},
	shorttitle = {{3D2SeqViews}},
	url = {https://ieeexplore.ieee.org/document/8666059/},
	doi = {10.1109/TIP.2019.2904460},
	language = {en},
	number = {8},
	urldate = {2022-05-10},
	journal = {IEEE Transactions on Image Processing},
	author = {Han, Zhizhong and Lu, Honglei and Liu, Zhenbao and Vong, Chi-Man and Liu, Yu-Shen and Zwicker, Matthias and Han, Junwei and Chen, C. L. Philip},
	month = aug,
	year = {2019},
	pages = {3986--3999},
	file = {Han et al. - 2019 - 3D2SeqViews Aggregating Sequential Views for 3D G.pdf:/home/erago/Zotero/storage/LMY7ERRG/Han et al. - 2019 - 3D2SeqViews Aggregating Sequential Views for 3D G.pdf:application/pdf},
}

@inproceedings{cheraghian_3dcapsule_2019,
	address = {Waikoloa Village, HI, USA},
	title = {{3DCapsule}: {Extending} the {Capsule} {Architecture} to {Classify} {3D} {Point} {Clouds}},
	isbn = {978-1-72811-975-5},
	shorttitle = {{3DCapsule}},
	url = {https://ieeexplore.ieee.org/document/8658405/},
	doi = {10.1109/WACV.2019.00132},
	abstract = {This paper introduces the 3DCapsule, which is a 3D extension of the recently introduced Capsule concept that makes it applicable to unordered point sets. The original Capsule relies on the existence of a spatial relationship between the elements in the feature map it is presented with, whereas in point permutation invariant formulations of 3D point set classiﬁcation methods, such relationships are typically lost. Here, a new layer called ComposeCaps is introduced that, in lieu of a spatially relevant feature mapping, learns a new mapping that can be exploited by the 3DCapsule. Previous works in the 3D point set classiﬁcation domain have focused on other parts of the architecture, whereas instead, the 3DCapsule is a drop-in replacement of the commonly used fully connected classiﬁer. It is demonstrated via an ablation study, that when the 3DCapsule is applied to recent 3D point set classiﬁcation architectures, it consistently shows an improvement, in particular when subjected to noisy data. Similarly, the ComposeCaps layer is evaluated and demonstrates an improvement over the baseline. In an apples-to-apples comparison against state-ofthe-art methods, again, better performance is demonstrated by the 3DCapsule.},
	language = {en},
	urldate = {2022-05-10},
	booktitle = {2019 {IEEE} {Winter} {Conference} on {Applications} of {Computer} {Vision} ({WACV})},
	publisher = {IEEE},
	author = {Cheraghian, Ali and Petersson, Lars},
	month = jan,
	year = {2019},
	pages = {1194--1202},
	file = {Cheraghian and Petersson - 2019 - 3DCapsule Extending the Capsule Architecture to C.pdf:/home/erago/Zotero/storage/4H6GV4V8/Cheraghian and Petersson - 2019 - 3DCapsule Extending the Capsule Architecture to C.pdf:application/pdf},
}

@article{kanezaki_rotationnet_2021,
	title = {{RotationNet} for {Joint} {Object} {Categorization} and {Unsupervised} {Pose} {Estimation} from {Multi}-{View} {Images}},
	volume = {43},
	issn = {0162-8828, 2160-9292, 1939-3539},
	url = {https://ieeexplore.ieee.org/document/8736864/},
	doi = {10.1109/TPAMI.2019.2922640},
	abstract = {We propose a Convolutional Neural Network (CNN)-based model “RotationNet,” which takes multi-view images of an object as input and jointly estimates its pose and object category. Unlike previous approaches that use known viewpoint labels for training, our method treats the viewpoint labels as latent variables, which are learned in an unsupervised manner during the training using an unaligned object dataset. RotationNet uses only a partial set of multi-view images for inference, and this property makes it useful in practical scenarios where only partial views are available. Moreover, our pose alignment strategy enables one to obtain view-speciﬁc feature representations shared across classes, which is important to maintain high accuracy in both object categorization and pose estimation. Effectiveness of RotationNet is demonstrated by its superior performance to the state-of-the-art methods of 3D object classiﬁcation on 10- and 40-class ModelNet datasets. We also show that RotationNet, even trained without known poses, achieves comparable performance to the state-of-the-art methods on an object pose estimation dataset. Furthermore, our object ranking method based on classiﬁcation by RotationNet achieved the ﬁrst prize in two tracks of the 3D Shape Retrieval Contest (SHREC) 2017. Finally, we demonstrate the performance of real-world applications of RotationNet trained with our newly created multi-view image dataset using a moving USB camera.},
	language = {en},
	number = {1},
	urldate = {2022-05-10},
	journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
	author = {Kanezaki, Asako and Matsushita, Yasuyuki and Nishida, Yoshifumi},
	month = jan,
	year = {2021},
	pages = {269--283},
	file = {Kanezaki et al. - 2021 - RotationNet for Joint Object Categorization and Un.pdf:/home/erago/Zotero/storage/JY98IMEX/Kanezaki et al. - 2021 - RotationNet for Joint Object Categorization and Un.pdf:application/pdf},
}

@article{wang_normalnet_2019,
	title = {{NormalNet}: {A} voxel-based {CNN} for {3D} object classification and retrieval},
	volume = {323},
	issn = {09252312},
	shorttitle = {{NormalNet}},
	url = {https://linkinghub.elsevier.com/retrieve/pii/S0925231218311561},
	doi = {10.1016/j.neucom.2018.09.075},
	language = {en},
	urldate = {2022-05-10},
	journal = {Neurocomputing},
	author = {Wang, Cheng and Cheng, Ming and Sohel, Ferdous and Bennamoun, Mohammed and Li, Jonathan},
	month = jan,
	year = {2019},
	pages = {139--147},
	file = {Wang et al. - 2019 - NormalNet A voxel-based CNN for 3D object classif.pdf:/home/erago/Zotero/storage/WM22HW55/Wang et al. - 2019 - NormalNet A voxel-based CNN for 3D object classif.pdf:application/pdf},
}

@article{xuan_mv-c3d_2019,
	title = {{MV}-{C3D}: {A} {Spatial} {Correlated} {Multi}-{View} {3D} {Convolutional} {Neural} {Networks}},
	volume = {7},
	issn = {2169-3536},
	shorttitle = {{MV}-{C3D}},
	doi = {10.1109/ACCESS.2019.2923022},
	abstract = {As the development of deep neural networks, 3D object recognition is becoming increasingly popular in the computer vision community. Many multi-view-based methods are proposed to improve the category recognition accuracy. These approaches mainly rely on multi-view images that are rendered with the whole circumference. In real-world applications, however, 3D objects are mostly observed from partial viewpoints in a less range. Therefore, we propose a multi-view-based 3D convolutional neural network that takes only part of contiguous multi-view images as input and can still maintain high accuracy. Moreover, our model takes these view images as a joint variable to better learn the spatially correlated features using 3D convolution and 3D max-pooling layers. The experimental results on ModelNet10 and ModelNet40 datasets show that our MV-C3D technique can achieve outstanding performance with multi-view images that are captured from partial angles with less range. The results on 3D-rotated real image dataset MIRO further demonstrate that MV-C3D is more adaptable in real-world scenarios. The classification accuracy can be further improved with the increasing number of view images.},
	journal = {IEEE Access},
	author = {Xuan, Qi and Li, Fuxian and Liu, Yi and Xiang, Yun},
	year = {2019},
	note = {Conference Name: IEEE Access},
	keywords = {3D object classification, Convolution, convolutional neural network, Convolutional neural networks, Correlation, deep learning, Kernel, multi-view, Solid modeling, Three-dimensional displays, Two dimensional displays},
	pages = {92528--92538},
	file = {IEEE Xplore Abstract Record:/home/erago/Zotero/storage/IKV2NZUF/8736713.html:text/html;IEEE Xplore Full Text PDF:/home/erago/Zotero/storage/9538VQQ4/Xuan et al. - 2019 - MV-C3D A Spatial Correlated Multi-View 3D Convolu.pdf:application/pdf},
}

@article{ma_learning_2019,
	title = {Learning {Multi}-{View} {Representation} {With} {LSTM} for 3-{D} {Shape} {Recognition} and {Retrieval}},
	volume = {21},
	issn = {1520-9210, 1941-0077},
	url = {https://ieeexplore.ieee.org/document/8490588/},
	doi = {10.1109/TMM.2018.2875512},
	abstract = {Shape representation for 3-D models is an important topic in computer vision, multimedia analysis, and computer graphics. Recent multiview-based methods demonstrate promising performance for 3-D shape recognition and retrieval. However, most multiview-based methods ignore the correlations of multiple views or suffer from high computional cost. In this paper, we propose a novel multiview-based network architecture for 3-D shape recognition and retrieval. Our network combines convolutional neural networks (CNNs) with long short-term memory (LSTM) to exploit the correlative information from multiple views. Well-pretrained CNNs with residual connections are ﬁrst used to extract a low-level feature of each view image rendered from a 3-D shape. Then, a LSTM and a sequence voting layer are employed to aggregate these features into a shape descriptor. The highway network and a three-step training strategy are also adopted to boost the optimization of the deep network. Experimental results on two public datasets demonstrate that the proposed method achieves promising performance for 3-D shape recognition and the state-of-the-art performance for the 3-D shape retrieval.},
	language = {en},
	number = {5},
	urldate = {2022-05-10},
	journal = {IEEE Transactions on Multimedia},
	author = {Ma, Chao and Guo, Yulan and Yang, Jungang and An, Wei},
	month = may,
	year = {2019},
	pages = {1169--1182},
	file = {Ma et al. - 2019 - Learning Multi-View Representation With LSTM for 3.pdf:/home/erago/Zotero/storage/8SCQU9C9/Ma et al. - 2019 - Learning Multi-View Representation With LSTM for 3.pdf:application/pdf},
}


@article{kasaei_orthographicnet_2020,
	title = {{OrthographicNet}: {A} {Deep} {Transfer} {Learning} {Approach} for {3D} {Object} {Recognition} in {Open}-{Ended} {Domains}},
	shorttitle = {{OrthographicNet}},
	url = {http://arxiv.org/abs/1902.03057},
	abstract = {Nowadays, service robots are appearing more and more in our daily life. For this type of robot, open-ended object category learning and recognition is necessary since no matter how extensive the training data used for batch learning, the robot might be faced with a new object when operating in a real-world environment. In this work, we present OrthographicNet, a Convolutional Neural Network (CNN)-based model, for 3D object recognition in open-ended domains. In particular, OrthographicNet generates a global rotation- and scale-invariant representation for a given 3D object, enabling robots to recognize the same or similar objects seen from different perspectives. Experimental results show that our approach yields significant improvements over the previous state-of-the-art approaches concerning object recognition performance and scalability in open-ended scenarios. Moreover, OrthographicNet demonstrates the capability of learning new categories from very few examples on-site. Regarding real-time performance, three real-world demonstrations validate the promising performance of the proposed architecture.},
	urldate = {2022-05-12},
	journal = {arXiv:1902.03057 [cs]},
	author = {Kasaei, Hamidreza},
	month = dec,
	year = {2020},
	note = {arXiv: 1902.03057},
	keywords = {Computer Science - Computer Vision and Pattern Recognition, Computer Science - Robotics},
	file = {arXiv Fulltext PDF:/home/erago/Zotero/storage/4YVS65GP/Kasaei - 2020 - OrthographicNet A Deep Transfer Learning Approach.pdf:application/pdf;arXiv.org Snapshot:/home/erago/Zotero/storage/IJ7UH3HT/1902.html:text/html},
}


@article{bonaventura_survey_2018,
	title = {A {Survey} of {Viewpoint} {Selection} {Methods} for {Polygonal} {Models}},
	volume = {20},
	issn = {1099-4300},
	url = {http://www.mdpi.com/1099-4300/20/5/370},
	doi = {10.3390/e20050370},
	abstract = {Viewpoint selection has been an emerging area in computer graphics for some years, and it is now getting maturity with applications in ﬁelds such as scene navigation, scientiﬁc visualization, object recognition, mesh simpliﬁcation, and camera placement. In this survey, we review and compare twenty-two measures to select good views of a polygonal 3D model, classify them using an extension of the categories deﬁned by Secord et al., and evaluate them against the Dutagaci et al. benchmark. Eleven of these measures have not been reviewed in previous surveys. Three out of the ﬁve short-listed best viewpoint measures are directly related to information. We also present in which ﬁelds the different viewpoint measures have been applied. Finally, we provide a publicly available framework where all the viewpoint selection measures are implemented and can be compared against each other.},
	language = {en},
	number = {5},
	urldate = {2022-05-14},
	journal = {Entropy},
	author = {Bonaventura, Xavier and Feixas, Miquel and Sbert, Mateu and Chuang, Lewis and Wallraven, Christian},
	month = may,
	year = {2018},
	pages = {370},
	file = {Bonaventura et al. - 2018 - A Survey of Viewpoint Selection Methods for Polygo.pdf:/home/erago/Zotero/storage/LC7Y8LTY/Bonaventura et al. - 2018 - A Survey of Viewpoint Selection Methods for Polygo.pdf:application/pdf},
}