Skip to content

Commit

Permalink
make SSE ifdefs in matrix header
Browse files Browse the repository at this point in the history
  • Loading branch information
CarVac committed Dec 26, 2020
1 parent 069ba50 commit 6bcf758
Showing 1 changed file with 12 additions and 0 deletions.
12 changes: 12 additions & 0 deletions filmulator-gui/core/matrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
#include <limits>
#include <algorithm>
#include "math.h"
#ifdef __SSE2__
#include <emmintrin.h>
#endif
#include <iostream>
#include <memory>
#include <omp.h>
Expand All @@ -45,13 +47,15 @@ class matrix
int num_rows;
int num_cols;
inline void slow_transpose_to(const matrix<T> &target) const;
#ifdef __SSE2__
inline void fast_transpose_to(const matrix<T> &target) const;
inline void transpose4x4_SSE(float *A, float *B, const int lda,
const int ldb) const;
inline void transpose_block_SSE4x4(float *A, float *B, const int n,
const int m, const int lda,
const int ldb,
const int block_size) const;
#endif
inline void transpose_scalar_block(float *A, float *B, const int lda,
const int ldb, const int block_size) const;
inline void transpose_block(float *A, float *B, const int n,
Expand Down Expand Up @@ -494,6 +498,7 @@ inline void matrix<T>::slow_transpose_to (const matrix<T> &target) const
data[row*num_cols + col];
}

#ifdef __SSE2__
template<>
inline void matrix<float>::fast_transpose_to (const matrix<float> &target) const
{
Expand All @@ -509,6 +514,7 @@ inline void matrix<T>::fast_transpose_to (const matrix<T> &target) const
{
slow_transpose_to(target);
}
#endif

template <class T>
inline void matrix<T>::transpose_to (const matrix<T> &target) const
Expand All @@ -519,13 +525,18 @@ inline void matrix<T>::transpose_to (const matrix<T> &target) const
template<>
inline void matrix<float>::transpose_to (const matrix<float> &target) const
{
#ifdef __SSE2__
//Fast transpose only work with matricies with dimensions of multiples of 16
if((num_rows%16 != 0) || (num_cols%16 !=0))
#endif
slow_transpose_to(target);
#ifdef __SSE2__
else
fast_transpose_to(target);
#endif
}

#ifdef __SSE2__
template<class T>
inline void matrix<T>::transpose4x4_SSE(float *A, float *B, const int lda,
const int ldb) const
Expand Down Expand Up @@ -559,6 +570,7 @@ inline void matrix<T>::transpose_block_SSE4x4(float *A, float *B, const int n,

}
}
#endif

template<class T>
inline void matrix<T>::transpose_scalar_block(float *A, float *B, const int lda, const int ldb, const int block_size) const {
Expand Down

0 comments on commit 6bcf758

Please sign in to comment.