Skip to content

Commit

Permalink
align -> MRPT_MAX_ALIGN_BYTES
Browse files Browse the repository at this point in the history
  • Loading branch information
jlblancoc committed Mar 15, 2018
1 parent f4f3c8b commit f91a54b
Show file tree
Hide file tree
Showing 17 changed files with 106 additions and 133 deletions.
143 changes: 58 additions & 85 deletions apps/mrpt-performance/perf-matrix2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ double matrix_test_chol_fix(int a1, int a2)
{
CMatrixFixedNumeric<T, DIM1, DIM1> A =
getRandomGenerator()
.drawDefinitePositiveMatrix<CMatrixFixedNumeric<T, DIM1, DIM1>,
Eigen::MatrixXd>(DIM1, 0.2);
.drawDefinitePositiveMatrix<
CMatrixFixedNumeric<T, DIM1, DIM1>, Eigen::MatrixXd>(DIM1, 0.2);
CMatrixFixedNumeric<T, DIM1, DIM1> chol_U;

const long N = 100;
Expand Down Expand Up @@ -126,8 +126,8 @@ double matrix_test_chol_Nx6x6_sparse(int DIM, int a2)

double matrix_test_loadFromArray(int N, int a2)
{
alignas(16) double nums[4 * 4] = {0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15};
alignas(MRPT_MAX_ALIGN_BYTES) double nums[4 * 4] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};

CMatrixFixedNumeric<double, 4, 4> M;

Expand All @@ -144,7 +144,7 @@ double matrix_test_loadWithEigenMap(int N, int a2)
CMatrixFixedNumeric<double, 4, 4> M;

CTicTac tictac;
M = Eigen::Map<CMatrixFixedNumeric<double, 4, 4>::Base, Eigen::Aligned>(
M = Eigen::Map<CMatrixFixedNumeric<double, 4, 4>::Base, Eigen::Aligned16>(
nums);
const double t = tictac.Tac();
dummy_do_nothing_with_string(mrpt::format("%e", M(0, 0)));
Expand All @@ -156,84 +156,57 @@ double matrix_test_loadWithEigenMap(int N, int a2)
// ------------------------------------------------------
void register_tests_matrices2()
{
lstTests.push_back(
TestData(
"matrix: chol, dyn[double] 4x4", matrix_test_chol_dyn<double, 4>));
lstTests.push_back(
TestData(
"matrix: chol, fix[double] 4x4", matrix_test_chol_fix<double, 4>));
lstTests.push_back(
TestData(
"matrix: chol, dyn[double] 40x40",
matrix_test_chol_dyn<double, 40>));
lstTests.push_back(
TestData(
"matrix: chol, fix[double] 40x40",
matrix_test_chol_fix<double, 40>));

lstTests.push_back(
TestData(
"matrix: chol, sparse [2x2;2x2]", matrix_test_chol_sparse<2, 2>));
lstTests.push_back(
TestData(
"matrix: chol, sparse [30x30;10x10]",
matrix_test_chol_sparse<30, 10>));

lstTests.push_back(
TestData(
"matrix: chol, dyn[double] 10x[6x6]", matrix_test_chol_Nx6x6_dyn,
10));
lstTests.push_back(
TestData(
"matrix: chol, sparse 10x[6x6]", matrix_test_chol_Nx6x6_sparse,
10));
lstTests.push_back(
TestData(
"matrix: chol, dyn[double] 20x[6x6]", matrix_test_chol_Nx6x6_dyn,
20));
lstTests.push_back(
TestData(
"matrix: chol, sparse 20x[6x6]", matrix_test_chol_Nx6x6_sparse,
20));
lstTests.push_back(
TestData(
"matrix: chol, dyn[double] 50x[6x6]", matrix_test_chol_Nx6x6_dyn,
50));
lstTests.push_back(
TestData(
"matrix: chol, sparse 50x[6x6]", matrix_test_chol_Nx6x6_sparse,
50));
lstTests.push_back(
TestData(
"matrix: chol, dyn[double] 100x[6x6]", matrix_test_chol_Nx6x6_dyn,
100, 2));
lstTests.push_back(
TestData(
"matrix: chol, sparse 100x[6x6]",
matrix_test_chol_Nx6x6_sparse, 100));
lstTests.push_back(
TestData(
"matrix: chol, dyn[double] 120x[6x6]", matrix_test_chol_Nx6x6_dyn,
120, 2));
lstTests.push_back(
TestData(
"matrix: chol, sparse 120x[6x6]",
matrix_test_chol_Nx6x6_sparse, 120));
lstTests.push_back(
TestData(
"matrix: chol, dyn[double] 140x[6x6]", matrix_test_chol_Nx6x6_dyn,
140, 2));
lstTests.push_back(
TestData(
"matrix: chol, sparse 140x[6x6]",
matrix_test_chol_Nx6x6_sparse, 140));

lstTests.push_back(
TestData(
"matrix: loadFromArray[double] 4x4", matrix_test_loadFromArray,
1e7));
lstTests.push_back(
TestData(
"matrix: load Eigen::Map[double] 4x4", matrix_test_loadWithEigenMap,
1e7));
lstTests.push_back(TestData(
"matrix: chol, dyn[double] 4x4", matrix_test_chol_dyn<double, 4>));
lstTests.push_back(TestData(
"matrix: chol, fix[double] 4x4", matrix_test_chol_fix<double, 4>));
lstTests.push_back(TestData(
"matrix: chol, dyn[double] 40x40", matrix_test_chol_dyn<double, 40>));
lstTests.push_back(TestData(
"matrix: chol, fix[double] 40x40", matrix_test_chol_fix<double, 40>));

lstTests.push_back(TestData(
"matrix: chol, sparse [2x2;2x2]", matrix_test_chol_sparse<2, 2>));
lstTests.push_back(TestData(
"matrix: chol, sparse [30x30;10x10]", matrix_test_chol_sparse<30, 10>));

lstTests.push_back(TestData(
"matrix: chol, dyn[double] 10x[6x6]", matrix_test_chol_Nx6x6_dyn, 10));
lstTests.push_back(TestData(
"matrix: chol, sparse 10x[6x6]", matrix_test_chol_Nx6x6_sparse,
10));
lstTests.push_back(TestData(
"matrix: chol, dyn[double] 20x[6x6]", matrix_test_chol_Nx6x6_dyn, 20));
lstTests.push_back(TestData(
"matrix: chol, sparse 20x[6x6]", matrix_test_chol_Nx6x6_sparse,
20));
lstTests.push_back(TestData(
"matrix: chol, dyn[double] 50x[6x6]", matrix_test_chol_Nx6x6_dyn, 50));
lstTests.push_back(TestData(
"matrix: chol, sparse 50x[6x6]", matrix_test_chol_Nx6x6_sparse,
50));
lstTests.push_back(TestData(
"matrix: chol, dyn[double] 100x[6x6]", matrix_test_chol_Nx6x6_dyn, 100,
2));
lstTests.push_back(TestData(
"matrix: chol, sparse 100x[6x6]", matrix_test_chol_Nx6x6_sparse,
100));
lstTests.push_back(TestData(
"matrix: chol, dyn[double] 120x[6x6]", matrix_test_chol_Nx6x6_dyn, 120,
2));
lstTests.push_back(TestData(
"matrix: chol, sparse 120x[6x6]", matrix_test_chol_Nx6x6_sparse,
120));
lstTests.push_back(TestData(
"matrix: chol, dyn[double] 140x[6x6]", matrix_test_chol_Nx6x6_dyn, 140,
2));
lstTests.push_back(TestData(
"matrix: chol, sparse 140x[6x6]", matrix_test_chol_Nx6x6_sparse,
140));

lstTests.push_back(TestData(
"matrix: loadFromArray[double] 4x4", matrix_test_loadFromArray, 1e7));
lstTests.push_back(TestData(
"matrix: load Eigen::Map[double] 4x4", matrix_test_loadWithEigenMap,
1e7));
}
2 changes: 1 addition & 1 deletion libs/core/include/mrpt/core/SSE_macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
#define BUILD_128BIT_CONST( \
_name, B0, B1, B2, B3, B4, B5, B6, B7, B8, B9, B10, B11, B12, B13, B14, \
B15) \
alignas(16) const unsigned long long _name[2] = { \
alignas(MRPT_MAX_ALIGN_BYTES) const unsigned long long _name[2] = { \
0x##B7##B6##B5##B4##B3##B2##B1##B0##ull, \
0x##B15##B14##B13##B12##B11##B10##B9##B8##ull};
2 changes: 1 addition & 1 deletion libs/img/src/CImage_SSE2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ void image_SSE2_scale_half_1c8u(const uint8_t* in, uint8_t* out, int w, int h)
void image_SSE2_scale_half_smooth_1c8u(
const uint8_t* in, uint8_t* out, int w, int h)
{
alignas(16) const unsigned long long mask[2] = {0x00FF00FF00FF00FFull,
alignas(MRPT_MAX_ALIGN_BYTES) const unsigned long long mask[2] = {0x00FF00FF00FF00FFull,
0x00FF00FF00FF00FFull};
const uint8_t* nextRow = in + w;
__m128i m = _mm_load_si128((const __m128i*)mask);
Expand Down
8 changes: 4 additions & 4 deletions libs/img/src/CImage_SSE3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,15 @@
*/
void image_SSSE3_scale_half_3c8u(const uint8_t* in, uint8_t* out, int w, int h)
{
alignas(16) const unsigned long long mask0[2] = {
alignas(MRPT_MAX_ALIGN_BYTES) const unsigned long long mask0[2] = {
0x0D0C080706020100ull, 0x808080808080800Eull}; // Long words are in
// inverse order due to
// little endianness
alignas(16) const unsigned long long mask1[2] = {0x8080808080808080ull,
alignas(MRPT_MAX_ALIGN_BYTES) const unsigned long long mask1[2] = {0x8080808080808080ull,
0x0E0A090804030280ull};
alignas(16) const unsigned long long mask2[2] = {0x0C0B0A0605040080ull,
alignas(MRPT_MAX_ALIGN_BYTES) const unsigned long long mask2[2] = {0x0C0B0A0605040080ull,
0x8080808080808080ull};
alignas(16) const unsigned long long mask3[2] = {0x808080808080800Full,
alignas(MRPT_MAX_ALIGN_BYTES) const unsigned long long mask3[2] = {0x808080808080800Full,
0x8080808080808080ull};

const __m128i m0 = _mm_load_si128((const __m128i*)mask0);
Expand Down
4 changes: 2 additions & 2 deletions libs/maps/src/maps/CPointsMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ void CPointsMap::determineMatching2D(
}

// Recover the min/max:
alignas(16) float temp_nums[4];
alignas(MRPT_MAX_ALIGN_BYTES) float temp_nums[4];

_mm_store_ps(temp_nums, x_mins);
local_x_min =
Expand Down Expand Up @@ -996,7 +996,7 @@ void CPointsMap::boundingBox(
}

// Recover the min/max:
alignas(16) float temp_nums[4];
alignas(MRPT_MAX_ALIGN_BYTES) float temp_nums[4];

_mm_store_ps(temp_nums, x_mins);
m_bb_min_x =
Expand Down
4 changes: 2 additions & 2 deletions libs/math/src/lightweight_geom_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ void TPose3D::getAsQuaternion(
// http://www.mrpt.org/6D_poses:equivalences_compositions_and_uncertainty
if (out_dq_dr)
{
alignas(16) const double nums[4 * 3] = {
alignas(MRPT_MAX_ALIGN_BYTES) const double nums[4 * 3] = {
-0.5 * q[3], 0.5 * (-csc + scs), -0.5 * q[1],
-0.5 * q[2], 0.5 * (-ssc - ccs), 0.5 * q[0],
0.5 * q[1], 0.5 * (ccc - sss), 0.5 * q[3],
Expand Down Expand Up @@ -286,7 +286,7 @@ void TPose3D::getRotationMatrix(mrpt::math::CMatrixDouble33& R) const
const double cr = cos(roll);
const double sr = sin(roll);

alignas(16) const double rot_vals[] = {cy * cp,
alignas(MRPT_MAX_ALIGN_BYTES) const double rot_vals[] = {cy * cp,
cy * sp * sr - sy * cr,
cy * sp * cr + sy * sr,
sy * cp,
Expand Down
10 changes: 5 additions & 5 deletions libs/math/src/matrix_ops5_unittest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ using namespace std;

TEST(Matrices, loadFromArray)
{
alignas(16)
alignas(MRPT_MAX_ALIGN_BYTES)
const double nums[3 * 4] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};

CMatrixFixedNumeric<double, 3, 4> mat;
Expand All @@ -34,11 +34,11 @@ TEST(Matrices, loadFromArray)

TEST(Matrices, CMatrixFixedNumeric_loadWithEigenMap)
{
alignas(16) double nums[3 * 4] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
alignas(32) double nums[3 * 4] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};

// Row major
const CMatrixFixedNumeric<double, 3, 4> mat =
Eigen::Map<CMatrixFixedNumeric<double, 3, 4>::Base, Eigen::Aligned>(
Eigen::Map<CMatrixFixedNumeric<double, 3, 4>::Base, Eigen::Aligned32>(
nums);

for (int r = 0; r < 3; r++)
Expand All @@ -47,10 +47,10 @@ TEST(Matrices, CMatrixFixedNumeric_loadWithEigenMap)

TEST(Matrices, EigenMatrix_loadWithEigenMap)
{
alignas(16) double nums[3 * 4] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
alignas(32) double nums[3 * 4] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
// Col major
const Eigen::Matrix<double, 3, 4> mat =
Eigen::Map<Eigen::Matrix<double, 3, 4>, Eigen::Aligned>(nums);
Eigen::Map<Eigen::Matrix<double, 3, 4>, Eigen::Aligned32>(nums);

for (int r = 0; r < 3; r++) // Transposed!!
for (int c = 0; c < 4; c++) EXPECT_EQ(nums[3 * c + r], mat(r, c));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ inline void do_project_3d_pointcloud_SSE2(
// Use optimized version:
const int W_4 = W >> 2; // /=4 , since we process 4 values at a time.
size_t idx = 0;
alignas(16) float xs[4], ys[4], zs[4];
alignas(MRPT_MAX_ALIGN_BYTES) float xs[4], ys[4], zs[4];
const __m128 D_zeros = _mm_set_ps(.0f, .0f, .0f, .0f);
const __m128 xormask =
(filterParams.rangeCheckBetween)
Expand Down
16 changes: 8 additions & 8 deletions libs/poses/src/CPose3D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ void CPose3D::rebuildRotationMatrix()
const double sr = sin(m_roll);
#endif

alignas(16) const double rot_vals[] = {cy * cp,
alignas(MRPT_MAX_ALIGN_BYTES) const double rot_vals[] = {cy * cp,
cy * sp * sr - sy * cr,
cy * sp * cr + sy * sr,
sy * cp,
Expand Down Expand Up @@ -392,7 +392,7 @@ void CPose3D::composePoint(
if (use_small_rot_approx)
{
// Linearized Jacobians around (yaw,pitch,roll)=(0,0,0):
alignas(16) const double nums[3 * 6] = {
alignas(MRPT_MAX_ALIGN_BYTES) const double nums[3 * 6] = {
1, 0, 0, -ly, lz, 0, 0, 1, 0, lx, 0, -lz, 0, 0, 1, 0, -lx, ly};
out_jacobian_df_dpose->loadFromArray(nums);
}
Expand All @@ -416,7 +416,7 @@ void CPose3D::composePoint(
const double sr = sin(m_roll);
#endif

alignas(16) const double nums[3 * 6] = {
alignas(MRPT_MAX_ALIGN_BYTES) const double nums[3 * 6] = {
1, 0, 0,
-lx * sy * cp + ly * (-sy * sp * sr - cy * cr) +
lz * (-sy * sp * cr + cy * sr), // d_x'/d_yaw
Expand Down Expand Up @@ -449,7 +449,7 @@ void CPose3D::composePoint(
// Jacob: df/dse3
if (out_jacobian_df_dse3)
{
alignas(16) const double nums[3 * 6] = {
alignas(MRPT_MAX_ALIGN_BYTES) const double nums[3 * 6] = {
1, 0, 0, 0, gz, -gy, 0, 1, 0, -gz, 0, gx, 0, 0, 1, gy, -gx, 0};
out_jacobian_df_dse3->loadFromArray(nums);
}
Expand Down Expand Up @@ -717,7 +717,7 @@ void CPose3D::inverseComposePoint(
const double Ay = gy - m_coords[1];
const double Az = gz - m_coords[2];

alignas(16) const double nums[3 * 6] = {
alignas(MRPT_MAX_ALIGN_BYTES) const double nums[3 * 6] = {
-m_ROT(0, 0),
-m_ROT(1, 0),
-m_ROT(2, 0),
Expand Down Expand Up @@ -749,7 +749,7 @@ void CPose3D::inverseComposePoint(
// Jacob: df/dse3
if (out_jacobian_df_dse3)
{
alignas(16) const double nums[3 * 6] = {
alignas(MRPT_MAX_ALIGN_BYTES) const double nums[3 * 6] = {
-1, 0, 0, 0, -lz, ly, 0, -1, 0, lz, 0, -lx, 0, 0, -1, -ly, lx, 0};
out_jacobian_df_dse3->loadFromArray(nums);
}
Expand Down Expand Up @@ -821,7 +821,7 @@ inline void deltaR(const MAT33& R, VEC3& v)
template <typename VEC3, typename MAT3x3, typename MAT3x9>
inline void M3x9(const VEC3& a, const MAT3x3& B, MAT3x9& RES)
{
alignas(16) const double vals[] = {
alignas(MRPT_MAX_ALIGN_BYTES) const double vals[] = {
a[0], -B(0, 2), B(0, 1), B(0, 2), a[0], -B(0, 0), -B(0, 1),
B(0, 0), a[0], a[1], -B(1, 2), B(1, 1), B(1, 2), a[1],
-B(1, 0), -B(1, 1), B(1, 0), a[1], a[2], -B(2, 2), B(2, 1),
Expand All @@ -840,7 +840,7 @@ inline CMatrixDouble33 ddeltaRt_dR(const CPose3D& P)
double b = abc[1];
double c = abc[2];

alignas(16) const double vals[] = {
alignas(MRPT_MAX_ALIGN_BYTES) const double vals[] = {
-b * t[1] - c * t[2], 2 * b * t[0] - a * t[1],
2 * c * t[0] - a * t[2], -b * t[0] + 2 * a * t[1],
-a * t[0] - c * t[2], 2 * c * t[1] - b * t[2],
Expand Down

0 comments on commit f91a54b

Please sign in to comment.