align -> MRPT_MAX_ALIGN_BYTES

MRPT · Mar 15, 2018 · f91a54b · f91a54b
1 parent f4f3c8b
commit f91a54b
Show file tree

Hide file tree

Showing 17 changed files with 106 additions and 133 deletions.
diff --git a/apps/mrpt-performance/perf-matrix2.cpp b/apps/mrpt-performance/perf-matrix2.cpp
@@ -64,8 +64,8 @@ double matrix_test_chol_fix(int a1, int a2)
 {
 	CMatrixFixedNumeric<T, DIM1, DIM1> A =
 		getRandomGenerator()
-			.drawDefinitePositiveMatrix<CMatrixFixedNumeric<T, DIM1, DIM1>,
-										Eigen::MatrixXd>(DIM1, 0.2);
+			.drawDefinitePositiveMatrix<
+				CMatrixFixedNumeric<T, DIM1, DIM1>, Eigen::MatrixXd>(DIM1, 0.2);
 	CMatrixFixedNumeric<T, DIM1, DIM1> chol_U;
 
 	const long N = 100;
@@ -126,8 +126,8 @@ double matrix_test_chol_Nx6x6_sparse(int DIM, int a2)
 
 double matrix_test_loadFromArray(int N, int a2)
 {
-	alignas(16) double nums[4 * 4] = {0, 1, 2,  3,  4,  5,  6,  7,
-									  8, 9, 10, 11, 12, 13, 14, 15};
+	alignas(MRPT_MAX_ALIGN_BYTES) double nums[4 * 4] = {
+		0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
 
 	CMatrixFixedNumeric<double, 4, 4> M;
 
@@ -144,7 +144,7 @@ double matrix_test_loadWithEigenMap(int N, int a2)
 	CMatrixFixedNumeric<double, 4, 4> M;
 
 	CTicTac tictac;
-	M = Eigen::Map<CMatrixFixedNumeric<double, 4, 4>::Base, Eigen::Aligned>(
+	M = Eigen::Map<CMatrixFixedNumeric<double, 4, 4>::Base, Eigen::Aligned16>(
 		nums);
 	const double t = tictac.Tac();
 	dummy_do_nothing_with_string(mrpt::format("%e", M(0, 0)));
@@ -156,84 +156,57 @@ double matrix_test_loadWithEigenMap(int N, int a2)
 // ------------------------------------------------------
 void register_tests_matrices2()
 {
-	lstTests.push_back(
-		TestData(
-			"matrix: chol, dyn[double] 4x4", matrix_test_chol_dyn<double, 4>));
-	lstTests.push_back(
-		TestData(
-			"matrix: chol, fix[double] 4x4", matrix_test_chol_fix<double, 4>));
-	lstTests.push_back(
-		TestData(
-			"matrix: chol, dyn[double] 40x40",
-			matrix_test_chol_dyn<double, 40>));
-	lstTests.push_back(
-		TestData(
-			"matrix: chol, fix[double] 40x40",
-			matrix_test_chol_fix<double, 40>));
-
-	lstTests.push_back(
-		TestData(
-			"matrix: chol, sparse [2x2;2x2]", matrix_test_chol_sparse<2, 2>));
-	lstTests.push_back(
-		TestData(
-			"matrix: chol, sparse [30x30;10x10]",
-			matrix_test_chol_sparse<30, 10>));
-
-	lstTests.push_back(
-		TestData(
-			"matrix: chol, dyn[double] 10x[6x6]", matrix_test_chol_Nx6x6_dyn,
-			10));
-	lstTests.push_back(
-		TestData(
-			"matrix: chol, sparse      10x[6x6]", matrix_test_chol_Nx6x6_sparse,
-			10));
-	lstTests.push_back(
-		TestData(
-			"matrix: chol, dyn[double] 20x[6x6]", matrix_test_chol_Nx6x6_dyn,
-			20));
-	lstTests.push_back(
-		TestData(
-			"matrix: chol, sparse      20x[6x6]", matrix_test_chol_Nx6x6_sparse,
-			20));
-	lstTests.push_back(
-		TestData(
-			"matrix: chol, dyn[double] 50x[6x6]", matrix_test_chol_Nx6x6_dyn,
-			50));
-	lstTests.push_back(
-		TestData(
-			"matrix: chol, sparse      50x[6x6]", matrix_test_chol_Nx6x6_sparse,
-			50));
-	lstTests.push_back(
-		TestData(
-			"matrix: chol, dyn[double] 100x[6x6]", matrix_test_chol_Nx6x6_dyn,
-			100, 2));
-	lstTests.push_back(
-		TestData(
-			"matrix: chol, sparse      100x[6x6]",
-			matrix_test_chol_Nx6x6_sparse, 100));
-	lstTests.push_back(
-		TestData(
-			"matrix: chol, dyn[double] 120x[6x6]", matrix_test_chol_Nx6x6_dyn,
-			120, 2));
-	lstTests.push_back(
-		TestData(
-			"matrix: chol, sparse      120x[6x6]",
-			matrix_test_chol_Nx6x6_sparse, 120));
-	lstTests.push_back(
-		TestData(
-			"matrix: chol, dyn[double] 140x[6x6]", matrix_test_chol_Nx6x6_dyn,
-			140, 2));
-	lstTests.push_back(
-		TestData(
-			"matrix: chol, sparse      140x[6x6]",
-			matrix_test_chol_Nx6x6_sparse, 140));
-
-	lstTests.push_back(
-		TestData(
-			"matrix: loadFromArray[double] 4x4", matrix_test_loadFromArray,
-			1e7));
-	lstTests.push_back(
-		TestData(
-			"matrix: load Eigen::Map[double] 4x4", matrix_test_loadWithEigenMap,
-			1e7));
+	lstTests.push_back(TestData(
+		"matrix: chol, dyn[double] 4x4", matrix_test_chol_dyn<double, 4>));
+	lstTests.push_back(TestData(
+		"matrix: chol, fix[double] 4x4", matrix_test_chol_fix<double, 4>));
+	lstTests.push_back(TestData(
+		"matrix: chol, dyn[double] 40x40", matrix_test_chol_dyn<double, 40>));
+	lstTests.push_back(TestData(
+		"matrix: chol, fix[double] 40x40", matrix_test_chol_fix<double, 40>));
+
+	lstTests.push_back(TestData(
+		"matrix: chol, sparse [2x2;2x2]", matrix_test_chol_sparse<2, 2>));
+	lstTests.push_back(TestData(
+		"matrix: chol, sparse [30x30;10x10]", matrix_test_chol_sparse<30, 10>));
+
+	lstTests.push_back(TestData(
+		"matrix: chol, dyn[double] 10x[6x6]", matrix_test_chol_Nx6x6_dyn, 10));
+	lstTests.push_back(TestData(
+		"matrix: chol, sparse      10x[6x6]", matrix_test_chol_Nx6x6_sparse,
+		10));
+	lstTests.push_back(TestData(
+		"matrix: chol, dyn[double] 20x[6x6]", matrix_test_chol_Nx6x6_dyn, 20));
+	lstTests.push_back(TestData(
+		"matrix: chol, sparse      20x[6x6]", matrix_test_chol_Nx6x6_sparse,
+		20));
+	lstTests.push_back(TestData(
+		"matrix: chol, dyn[double] 50x[6x6]", matrix_test_chol_Nx6x6_dyn, 50));
+	lstTests.push_back(TestData(
+		"matrix: chol, sparse      50x[6x6]", matrix_test_chol_Nx6x6_sparse,
+		50));
+	lstTests.push_back(TestData(
+		"matrix: chol, dyn[double] 100x[6x6]", matrix_test_chol_Nx6x6_dyn, 100,
+		2));
+	lstTests.push_back(TestData(
+		"matrix: chol, sparse      100x[6x6]", matrix_test_chol_Nx6x6_sparse,
+		100));
+	lstTests.push_back(TestData(
+		"matrix: chol, dyn[double] 120x[6x6]", matrix_test_chol_Nx6x6_dyn, 120,
+		2));
+	lstTests.push_back(TestData(
+		"matrix: chol, sparse      120x[6x6]", matrix_test_chol_Nx6x6_sparse,
+		120));
+	lstTests.push_back(TestData(
+		"matrix: chol, dyn[double] 140x[6x6]", matrix_test_chol_Nx6x6_dyn, 140,
+		2));
+	lstTests.push_back(TestData(
+		"matrix: chol, sparse      140x[6x6]", matrix_test_chol_Nx6x6_sparse,
+		140));
+
+	lstTests.push_back(TestData(
+		"matrix: loadFromArray[double] 4x4", matrix_test_loadFromArray, 1e7));
+	lstTests.push_back(TestData(
+		"matrix: load Eigen::Map[double] 4x4", matrix_test_loadWithEigenMap,
+		1e7));
 }
diff --git a/libs/core/include/mrpt/core/SSE_macros.h b/libs/core/include/mrpt/core/SSE_macros.h
@@ -13,6 +13,6 @@
 #define BUILD_128BIT_CONST(                                                 \
 	_name, B0, B1, B2, B3, B4, B5, B6, B7, B8, B9, B10, B11, B12, B13, B14, \
 	B15)                                                                    \
-	alignas(16) const unsigned long long _name[2] = {                       \
+	alignas(MRPT_MAX_ALIGN_BYTES) const unsigned long long _name[2] = {                       \
 		0x##B7##B6##B5##B4##B3##B2##B1##B0##ull,                            \
 		0x##B15##B14##B13##B12##B11##B10##B9##B8##ull};
diff --git a/libs/img/src/CImage_SSE2.cpp b/libs/img/src/CImage_SSE2.cpp
@@ -76,7 +76,7 @@ void image_SSE2_scale_half_1c8u(const uint8_t* in, uint8_t* out, int w, int h)
 void image_SSE2_scale_half_smooth_1c8u(
 	const uint8_t* in, uint8_t* out, int w, int h)
 {
-	alignas(16) const unsigned long long mask[2] = {0x00FF00FF00FF00FFull,
+	alignas(MRPT_MAX_ALIGN_BYTES) const unsigned long long mask[2] = {0x00FF00FF00FF00FFull,
 													0x00FF00FF00FF00FFull};
 	const uint8_t* nextRow = in + w;
 	__m128i m = _mm_load_si128((const __m128i*)mask);

diff --git a/libs/img/src/CImage_SSE3.cpp b/libs/img/src/CImage_SSE3.cpp
@@ -39,15 +39,15 @@
  */
 void image_SSSE3_scale_half_3c8u(const uint8_t* in, uint8_t* out, int w, int h)
 {
-	alignas(16) const unsigned long long mask0[2] = {
+	alignas(MRPT_MAX_ALIGN_BYTES) const unsigned long long mask0[2] = {
 		0x0D0C080706020100ull, 0x808080808080800Eull};  // Long words are in
 	// inverse order due to
 	// little endianness
-	alignas(16) const unsigned long long mask1[2] = {0x8080808080808080ull,
+	alignas(MRPT_MAX_ALIGN_BYTES) const unsigned long long mask1[2] = {0x8080808080808080ull,
 													 0x0E0A090804030280ull};
-	alignas(16) const unsigned long long mask2[2] = {0x0C0B0A0605040080ull,
+	alignas(MRPT_MAX_ALIGN_BYTES) const unsigned long long mask2[2] = {0x0C0B0A0605040080ull,
 													 0x8080808080808080ull};
-	alignas(16) const unsigned long long mask3[2] = {0x808080808080800Full,
+	alignas(MRPT_MAX_ALIGN_BYTES) const unsigned long long mask3[2] = {0x808080808080800Full,
 													 0x8080808080808080ull};
 
 	const __m128i m0 = _mm_load_si128((const __m128i*)mask0);

diff --git a/libs/maps/src/maps/CPointsMap.cpp b/libs/maps/src/maps/CPointsMap.cpp
@@ -404,7 +404,7 @@ void CPointsMap::determineMatching2D(
 	}
 
 	// Recover the min/max:
-	alignas(16) float temp_nums[4];
+	alignas(MRPT_MAX_ALIGN_BYTES) float temp_nums[4];
 
 	_mm_store_ps(temp_nums, x_mins);
 	local_x_min =
@@ -996,7 +996,7 @@ void CPointsMap::boundingBox(
 			}
 
 			// Recover the min/max:
-			alignas(16) float temp_nums[4];
+			alignas(MRPT_MAX_ALIGN_BYTES) float temp_nums[4];
 
 			_mm_store_ps(temp_nums, x_mins);
 			m_bb_min_x =

diff --git a/libs/math/src/lightweight_geom_data.cpp b/libs/math/src/lightweight_geom_data.cpp
@@ -247,7 +247,7 @@ void TPose3D::getAsQuaternion(
 	//   http://www.mrpt.org/6D_poses:equivalences_compositions_and_uncertainty
 	if (out_dq_dr)
 	{
-		alignas(16) const double nums[4 * 3] = {
+		alignas(MRPT_MAX_ALIGN_BYTES) const double nums[4 * 3] = {
 			-0.5 * q[3], 0.5 * (-csc + scs), -0.5 * q[1],
 			-0.5 * q[2], 0.5 * (-ssc - ccs), 0.5 * q[0],
 			0.5 * q[1],  0.5 * (ccc - sss),  0.5 * q[3],
@@ -286,7 +286,7 @@ void TPose3D::getRotationMatrix(mrpt::math::CMatrixDouble33& R) const
 	const double cr = cos(roll);
 	const double sr = sin(roll);
 
-	alignas(16) const double rot_vals[] = {cy * cp,
+	alignas(MRPT_MAX_ALIGN_BYTES) const double rot_vals[] = {cy * cp,
 										   cy * sp * sr - sy * cr,
 										   cy * sp * cr + sy * sr,
 										   sy * cp,

diff --git a/libs/math/src/matrix_ops5_unittest.cpp b/libs/math/src/matrix_ops5_unittest.cpp
@@ -22,7 +22,7 @@ using namespace std;
 
 TEST(Matrices, loadFromArray)
 {
-	alignas(16)
+	alignas(MRPT_MAX_ALIGN_BYTES)
 		const double nums[3 * 4] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
 
 	CMatrixFixedNumeric<double, 3, 4> mat;
@@ -34,11 +34,11 @@ TEST(Matrices, loadFromArray)
 
 TEST(Matrices, CMatrixFixedNumeric_loadWithEigenMap)
 {
-	alignas(16) double nums[3 * 4] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+	alignas(32) double nums[3 * 4] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
 
 	// Row major
 	const CMatrixFixedNumeric<double, 3, 4> mat =
-		Eigen::Map<CMatrixFixedNumeric<double, 3, 4>::Base, Eigen::Aligned>(
+		Eigen::Map<CMatrixFixedNumeric<double, 3, 4>::Base, Eigen::Aligned32>(
 			nums);
 
 	for (int r = 0; r < 3; r++)
@@ -47,10 +47,10 @@ TEST(Matrices, CMatrixFixedNumeric_loadWithEigenMap)
 
 TEST(Matrices, EigenMatrix_loadWithEigenMap)
 {
-	alignas(16) double nums[3 * 4] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+	alignas(32) double nums[3 * 4] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
 	// Col major
 	const Eigen::Matrix<double, 3, 4> mat =
-		Eigen::Map<Eigen::Matrix<double, 3, 4>, Eigen::Aligned>(nums);
+		Eigen::Map<Eigen::Matrix<double, 3, 4>, Eigen::Aligned32>(nums);
 
 	for (int r = 0; r < 3; r++)  // Transposed!!
 		for (int c = 0; c < 4; c++) EXPECT_EQ(nums[3 * c + r], mat(r, c));

diff --git a/libs/obs/include/mrpt/obs/CObservation3DRangeScan_project3D_impl.h b/libs/obs/include/mrpt/obs/CObservation3DRangeScan_project3D_impl.h
@@ -385,7 +385,7 @@ inline void do_project_3d_pointcloud_SSE2(
 	// Use optimized version:
 	const int W_4 = W >> 2;  // /=4 , since we process 4 values at a time.
 	size_t idx = 0;
-	alignas(16) float xs[4], ys[4], zs[4];
+	alignas(MRPT_MAX_ALIGN_BYTES) float xs[4], ys[4], zs[4];
 	const __m128 D_zeros = _mm_set_ps(.0f, .0f, .0f, .0f);
 	const __m128 xormask =
 		(filterParams.rangeCheckBetween)

diff --git a/libs/poses/src/CPose3D.cpp b/libs/poses/src/CPose3D.cpp
@@ -273,7 +273,7 @@ void CPose3D::rebuildRotationMatrix()
 	const double sr = sin(m_roll);
 #endif
 
-	alignas(16) const double rot_vals[] = {cy * cp,
+	alignas(MRPT_MAX_ALIGN_BYTES) const double rot_vals[] = {cy * cp,
 										   cy * sp * sr - sy * cr,
 										   cy * sp * cr + sy * sr,
 										   sy * cp,
@@ -392,7 +392,7 @@ void CPose3D::composePoint(
 		if (use_small_rot_approx)
 		{
 			// Linearized Jacobians around (yaw,pitch,roll)=(0,0,0):
-			alignas(16) const double nums[3 * 6] = {
+			alignas(MRPT_MAX_ALIGN_BYTES) const double nums[3 * 6] = {
 				1, 0, 0, -ly, lz, 0, 0, 1, 0, lx, 0, -lz, 0, 0, 1, 0, -lx, ly};
 			out_jacobian_df_dpose->loadFromArray(nums);
 		}
@@ -416,7 +416,7 @@ void CPose3D::composePoint(
 			const double sr = sin(m_roll);
 #endif
 
-			alignas(16) const double nums[3 * 6] = {
+			alignas(MRPT_MAX_ALIGN_BYTES) const double nums[3 * 6] = {
 				1, 0, 0,
 				-lx * sy * cp + ly * (-sy * sp * sr - cy * cr) +
 					lz * (-sy * sp * cr + cy * sr),  // d_x'/d_yaw
@@ -449,7 +449,7 @@ void CPose3D::composePoint(
 	// Jacob: df/dse3
 	if (out_jacobian_df_dse3)
 	{
-		alignas(16) const double nums[3 * 6] = {
+		alignas(MRPT_MAX_ALIGN_BYTES) const double nums[3 * 6] = {
 			1, 0, 0, 0, gz, -gy, 0, 1, 0, -gz, 0, gx, 0, 0, 1, gy, -gx, 0};
 		out_jacobian_df_dse3->loadFromArray(nums);
 	}
@@ -717,7 +717,7 @@ void CPose3D::inverseComposePoint(
 		const double Ay = gy - m_coords[1];
 		const double Az = gz - m_coords[2];
 
-		alignas(16) const double nums[3 * 6] = {
+		alignas(MRPT_MAX_ALIGN_BYTES) const double nums[3 * 6] = {
 			-m_ROT(0, 0),
 			-m_ROT(1, 0),
 			-m_ROT(2, 0),
@@ -749,7 +749,7 @@ void CPose3D::inverseComposePoint(
 	// Jacob: df/dse3
 	if (out_jacobian_df_dse3)
 	{
-		alignas(16) const double nums[3 * 6] = {
+		alignas(MRPT_MAX_ALIGN_BYTES) const double nums[3 * 6] = {
 			-1, 0, 0, 0, -lz, ly, 0, -1, 0, lz, 0, -lx, 0, 0, -1, -ly, lx, 0};
 		out_jacobian_df_dse3->loadFromArray(nums);
 	}
@@ -821,7 +821,7 @@ inline void deltaR(const MAT33& R, VEC3& v)
 template <typename VEC3, typename MAT3x3, typename MAT3x9>
 inline void M3x9(const VEC3& a, const MAT3x3& B, MAT3x9& RES)
 {
-	alignas(16) const double vals[] = {
+	alignas(MRPT_MAX_ALIGN_BYTES) const double vals[] = {
 		a[0],	 -B(0, 2), B(0, 1),  B(0, 2),  a[0],	-B(0, 0), -B(0, 1),
 		B(0, 0),  a[0],		a[1],	 -B(1, 2), B(1, 1), B(1, 2),  a[1],
 		-B(1, 0), -B(1, 1), B(1, 0),  a[1],		a[2],	-B(2, 2), B(2, 1),
@@ -840,7 +840,7 @@ inline CMatrixDouble33 ddeltaRt_dR(const CPose3D& P)
 	double b = abc[1];
 	double c = abc[2];
 
-	alignas(16) const double vals[] = {
+	alignas(MRPT_MAX_ALIGN_BYTES) const double vals[] = {
 		-b * t[1] - c * t[2],	 2 * b * t[0] - a * t[1],
 		2 * c * t[0] - a * t[2],  -b * t[0] + 2 * a * t[1],
 		-a * t[0] - c * t[2],	 2 * c * t[1] - b * t[2],