Skip to content

Commit

Permalink
Added file with inline functions
Browse files Browse the repository at this point in the history
  • Loading branch information
Beyamor committed Aug 11, 2012
1 parent 7d975a8 commit 76479e5
Show file tree
Hide file tree
Showing 2 changed files with 294 additions and 2 deletions.
10 changes: 8 additions & 2 deletions Makefile
Expand Up @@ -16,9 +16,12 @@ FIXED_OP = $(BINDIR)/fixedpoint_optimized
MORE_OP_OBJS = $(SRCDIR)/more_optimized.o
MORE_OP = $(BINDIR)/more_optimized

ALL_OBJS = $(UNOP_OBJS) $(TRIG_OP_OBJS) $(FIXED_OP_OBJS) $(MORE_OP_OBJS)
INLINE_MORE_OP_OBJS = $(SRCDIR)/inline_more_optimized.o
INLINE_MORE_OP = $(BINDIR)/inline_more_optimized

all: unoptimized linear_trig_optimized fixedpoint_optimized more_optimized
ALL_OBJS = $(UNOP_OBJS) $(TRIG_OP_OBJS) $(FIXED_OP_OBJS) $(MORE_OP_OBJS) $(INLINE_MORE_OP_OBJS)

all: unoptimized linear_trig_optimized fixedpoint_optimized more_optimized inline_more_optimized

unoptimized: $(UNOP_OBJS)
$(CC) $(CFLAGS) -o $(UNOP) $(UNOP_OBJS) -l$(CLIBS)
Expand All @@ -32,5 +35,8 @@ fixedpoint_optimized: $(FIXED_OP_OBJS)
more_optimized: $(MORE_OP_OBJS)
$(CC) $(CFLAGS) -o $(MORE_OP) $(MORE_OP_OBJS) -l$(CLIBS)

inline_more_optimized: $(INLINE_MORE_OP_OBJS)
$(CC) $(CFLAGS) -o $(INLINE_MORE_OP) $(INLINE_MORE_OP_OBJS) -l$(CLIBS)

clean:
rm -f $(ALL_OBJS) $(BINDIR)/*
286 changes: 286 additions & 0 deletions src/inline_more_optimized.c
@@ -0,0 +1,286 @@
#include <stdlib.h>
#include <math.h>
#include <stdio.h>

/*
* Project specifications
*/
#define N 4

#define SCALE 4096
#define SCALE_OVER_2 2048
#define SCALE_BITS 12
/* 2^12 = (4096 * 16)
* This value is multiplied with the input(max 2^15) in a 32-bit int, but
* there was little accuracy difference between 2^12 and 2^14 for the scale
* (about +/- 0.2%)
*/

#define PI 12868
#define PI_OVER_2 6434

#define POINT 3317

#define COS_OFFSET 6257
#define SIN_OFFSET -2438
#define TAN_OFFSET 582

#define SIN_OUT_MULT 1056
#define SIN_IN_MULT 3983

#define TAN_OUT_MULT 2638
#define TAN_IN_MULT 3801

#define COS_OUT_MULT 3983
#define COS_IN_MULT 1056


/**
* Takes a square matrix
* The function prints the contents of the matrix
*/
void printMatrix( short matrix[N][N] ) {

short i = 0,
j = 0;

for ( i = 0; i < N; ++i ) {
for ( j = 0; j < N; ++j ) {
printf( "%s%d%s", matrix[i][j] < 0?"":" ", matrix[i][j], (j<N-1)? ",\t":"\n" );
}
}
}


static inline short cos_fixed(short angle){
short result = 0;

if(angle < -POINT)
{
result = COS_OFFSET + ((COS_OUT_MULT * angle) >> SCALE_BITS);
}
else if (angle < 0) {

result = SCALE + ((COS_IN_MULT * angle) >> SCALE_BITS);
}

else if (angle < POINT) {

result = SCALE -((COS_IN_MULT * angle) >> SCALE_BITS);
}

else if (angle < PI) {

result = COS_OFFSET - ((COS_OUT_MULT * angle) >> SCALE_BITS);
}
return result;
}


static inline short sin_fixed(short angle) {
int result = 0;
if (angle < -POINT) {

result = SIN_OFFSET + ((SIN_OUT_MULT * angle) >> SCALE_BITS);
}
else if (angle < POINT) {

result = (SIN_IN_MULT * angle) >> SCALE_BITS;
}

else if (angle < PI_OVER_2) {

result = SIN_OFFSET + ((SIN_OUT_MULT * angle) >> SCALE_BITS);
}

return (short)result;
}


static inline short atan_fixed(short y, short x)
{
if(x == 0 && y == 0)
{
printf("This should never happen\n");
return 0;
}

char isCot = y >= 0 ? x >= 0 ? y > x : y > -x : x >= 0 ? -y > x : -y > -x;

short result = 0;

//store angle in 32 bit to avoid truncation errors, shift up another scale factor (bitshift it later)
int angle = isCot ? (x << SCALE_BITS) / y : (y << SCALE_BITS) / x;

if(angle > SCALE_OVER_2 && angle <= (SCALE))
{
result = ((TAN_OUT_MULT * angle) >> SCALE_BITS) + TAN_OFFSET;
}
else if(angle <= SCALE_OVER_2 && angle >= -SCALE_OVER_2)
{
result = (TAN_IN_MULT * angle) >> SCALE_BITS;
}
else if(angle < -SCALE_OVER_2 && angle >= -SCALE)
{
result = ((TAN_OUT_MULT * angle) >> SCALE_BITS) - TAN_OFFSET;
}


if(isCot && angle < 0)
{
result += PI;
}

if(isCot)
{
result = (PI_OVER_2) - result;
}
return result;
}

#define MULTIPLICATION_STEP_INITIALIZATION(i,j) \
targetIndex = i*N + j; \
target[targetIndex] = 0;

#define MULTIPLICATION_STEP(i,j,k) \
temp32 = m1[i*N + k] * m2[k*N + j]; \
target[targetIndex] += (short)(temp32 >> SCALE_BITS); /* bitshift 12 is the same as division by scale */

#define MULTIPLICATION_STEPS_FOR_IJ(i,j) \
MULTIPLICATION_STEP_INITIALIZATION(i,j) \
MULTIPLICATION_STEP(i,j,0) \
MULTIPLICATION_STEP(i,j,1) \
MULTIPLICATION_STEP(i,j,2) \
MULTIPLICATION_STEP(i,j,3)

#define MULTIPLICATION_STEPS_FOR_I(i) \
MULTIPLICATION_STEPS_FOR_IJ(i,0) \
MULTIPLICATION_STEPS_FOR_IJ(i,1) \
MULTIPLICATION_STEPS_FOR_IJ(i,2) \
MULTIPLICATION_STEPS_FOR_IJ(i,3)

#define MULTIPLICATION_STEPS() \
MULTIPLICATION_STEPS_FOR_I(0) \
MULTIPLICATION_STEPS_FOR_I(1) \
MULTIPLICATION_STEPS_FOR_I(2) \
MULTIPLICATION_STEPS_FOR_I(3)

/**
* Multiplies a 4x4 matrix with a 4x4 matrix, storing output in a 4x4 matrix
*/
static inline void multMatrix4( short *restrict m1, short *restrict m2, short *restrict target ) {
short i,j,k;

int temp32 = 0, targetIndex = 0;

MULTIPLICATION_STEPS()
}

#define SET_AS_IDENTITY(m) \
m[0][0] = SCALE; m[0][1] = 0; m[0][2] = 0; m[0][3] = 0; \
m[1][0] = 0; m[1][1] = SCALE; m[1][2] = 0; m[1][3] = 0; \
m[2][0] = 0; m[2][1] = 0; m[2][2] = SCALE; m[2][3] = 0; \
m[3][0] = 0; m[3][1] = 0; m[3][2] = 0; m[3][3] = SCALE;

#define DIAGONALIZATION_ITERATION(iter)\
i = ia[iter]; \
j = ja[iter]; \
\
a = matrix[i][i]; \
b = matrix[i][j]; \
c = matrix[j][i]; \
d = matrix[j][j]; \
\
/* calculate rotation angle */ \
thetaSum = atan_fixed( (c+b), (d-a)); /* Equals thetaL + thetaR */ \
thetaDif = atan_fixed( (c-b), (d+a)); /* Equals thetaR - thetaL */ \
\
thetaL = (thetaSum - thetaDif) >> 1; /* division by 2 */ \
thetaR = (thetaSum + thetaDif) >> 1; \
\
cosL = cos_fixed(thetaL); \
cosR = cos_fixed(thetaR); \
sinL = sin_fixed(thetaL); \
sinR = sin_fixed(thetaR); \
\
\
SET_AS_IDENTITY(rotR)\
SET_AS_IDENTITY(rotL)\
\
rotL[i][i] = cosL; /* rotation' */ \
rotL[i][j] = -sinL; \
rotL[j][i] = sinL; \
rotL[j][j] = cosL; \
\
rotR[i][i] = cosR; /* rotation */ \
rotR[i][j] = sinR; \
rotR[j][i] = -sinR; \
rotR[j][j] = cosR; \
\
multMatrix4( (short*)rotL, (short*)matrix, (short*)med); \
multMatrix4( (short*)med, (short*)rotR, (short*)matrix);

#define DIAGONALIZATION_CYCLE() \
DIAGONALIZATION_ITERATION(0) \
DIAGONALIZATION_ITERATION(1) \
DIAGONALIZATION_ITERATION(2) \
DIAGONALIZATION_ITERATION(3) \
DIAGONALIZATION_ITERATION(4) \
DIAGONALIZATION_ITERATION(5)

/**
* Takes a square matrix and diagonalizes it
*/
void diagonalize( short matrix[N][N] ) {

short iter;
short repeat;

//sima method
//i {0,2,0,1,0,1}
//j {1,3,2,3,3,2}
short ia[16] = {1,0,1,0,0,2};
short ja[16] = {2,3,3,2,1,3};

//found by testing permutations, matches wolframalpha
//short ia[16] = {1,0,1,0,2,0};
//short ja[16] = {2,3,3,2,3,1};

short i = 0,
j = 0,
a = 0,
b = 0,
c = 0,
d = 0,
thetaSum = 0,
thetaDif = 0,
thetaL = 0,
thetaR = 0,
cosL = 0,
cosR = 0,
sinL = 0,
sinR = 0,
rotR[N][N],
rotL[N][N],
med[N][N];

DIAGONALIZATION_CYCLE()
DIAGONALIZATION_CYCLE()
DIAGONALIZATION_CYCLE()
}

int main() {

short m[N][N] = {{ 512, 1024, 1536, 512 },
{ 1024, 1536, 512, 1024 },
{ 1536, 512, 1024, 1536 },
{ 512, 1024, 1536, 2048 }};

printMatrix( m );
printf("\r\n->\r\n\r\n");

diagonalize( m );
printMatrix( m );

return 0;
}

0 comments on commit 76479e5

Please sign in to comment.