Permalink
Browse files

Updated scale16 and scale16by8 for AVR when FASTLED_SCALE8_FIXED is 1…

…. scale16(foo,65535) now equals foo. Updated DemoReel100 to stay within range of NUM_LEDS on non-AVR platforms. Fixes #368.
  • Loading branch information...
kriegsman committed Jan 15, 2017
1 parent d90a452 commit 94e46e3978b9f2bce8f76383ff9a19879b245ad0
Showing with 114 additions and 3 deletions.
  1. +2 −2 examples/DemoReel100/DemoReel100.ino
  2. +112 −1 lib8tion/scale8.h
@@ -99,7 +99,7 @@ void sinelon()
{
// a colored dot sweeping back and forth, with fading trails
fadeToBlackBy( leds, NUM_LEDS, 20);
int pos = beatsin16(13,0,NUM_LEDS);
int pos = beatsin16( 13, 0, NUM_LEDS-1 );
leds[pos] += CHSV( gHue, 255, 192);
}
@@ -119,7 +119,7 @@ void juggle() {
fadeToBlackBy( leds, NUM_LEDS, 20);
byte dothue = 0;
for( int i = 0; i < 8; i++) {
leds[beatsin16(i+7,0,NUM_LEDS)] |= CHSV(dothue, 200, 255);
leds[beatsin16( i+7, 0, NUM_LEDS-1 )] |= CHSV(dothue, 200, 255);
dothue += 32;
}
}
View
@@ -424,6 +424,34 @@ LIB8STATIC_ALWAYS_INLINE uint16_t scale16by8( uint16_t i, fract8 scale )
#endif
return result;
#elif SCALE16BY8_AVRASM == 1
#if FASTLED_SCALE8_FIXED == 1
uint16_t result = 0;
asm volatile(
// result.A = HighByte( (i.A x scale) + i.A )
" mul %A[i], %[scale] \n\t"
" add r0, %A[i] \n\t"
// " adc r1, [zero] \n\t"
// " mov %A[result], r1 \n\t"
" adc %A[result], r1 \n\t"
// result.A-B += i.B x scale
" mul %B[i], %[scale] \n\t"
" add %A[result], r0 \n\t"
" adc %B[result], r1 \n\t"
// cleanup r1
" clr __zero_reg__ \n\t"
// result.A-B += i.B
" add %A[result], %B[i] \n\t"
" adc %B[result], __zero_reg__ \n\t"
: [result] "+r" (result)
: [i] "r" (i), [scale] "r" (scale)
: "r0", "r1"
);
return result;
#else
uint16_t result = 0;
asm volatile(
// result.A = HighByte(i.A x j )
@@ -444,6 +472,7 @@ LIB8STATIC_ALWAYS_INLINE uint16_t scale16by8( uint16_t i, fract8 scale )
: "r0", "r1"
);
return result;
#endif
#else
#error "No implementation for scale16by8 available."
#endif
@@ -464,6 +493,14 @@ LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale )
#endif
return result;
#elif SCALE16_AVRASM == 1
#if FASTLED_SCALE8_FIXED == 1
// implemented sort of like
// result = ((i * scale) + i ) / 65536
//
// why not like this, you may ask?
// result = (i * (scale+1)) / 65536
// the answer is that if scale is 65535, then scale+1
// will be zero, which is not what we want.
uint32_t result;
asm volatile(
// result.A-B = i.A x scale.A
@@ -474,7 +511,80 @@ LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale )
//" mov %B[result], r1 \n\t"
// which can be written as...
" movw %A[result], r0 \n\t"
// We actually need to do anything with r0,
// Because we're going to add i.A-B to
// result.A-D, we DO need to keep both
// the r0 and r1 portions of the product
// UNlike in the 'unfixed scale8' version.
// So the movw here is needed.
: [result] "=r" (result)
: [i] "r" (i),
[scale] "r" (scale)
: "r0", "r1"
);
asm volatile(
// result.C-D = i.B x scale.B
" mul %B[i], %B[scale] \n\t"
//" mov %C[result], r0 \n\t"
//" mov %D[result], r1 \n\t"
" movw %C[result], r0 \n\t"
: [result] "+r" (result)
: [i] "r" (i),
[scale] "r" (scale)
: "r0", "r1"
);
const uint8_t zero = 0;
asm volatile(
// result.B-D += i.B x scale.A
" mul %B[i], %A[scale] \n\t"
" add %B[result], r0 \n\t"
" adc %C[result], r1 \n\t"
" adc %D[result], %[zero] \n\t"
// result.B-D += i.A x scale.B
" mul %A[i], %B[scale] \n\t"
" add %B[result], r0 \n\t"
" adc %C[result], r1 \n\t"
" adc %D[result], %[zero] \n\t"
// cleanup r1
" clr r1 \n\t"
: [result] "+r" (result)
: [i] "r" (i),
[scale] "r" (scale),
[zero] "r" (zero)
: "r0", "r1"
);
asm volatile(
// result.A-D += i.A-B
" add %A[result], %A[i] \n\t"
" adc %B[result], %B[i] \n\t"
" adc %C[result], %[zero] \n\t"
" adc %D[result], %[zero] \n\t"
: [result] "+r" (result)
: [i] "r" (i),
[zero] "r" (zero)
);
result = result >> 16;
return result;
#else
uint32_t result;
asm volatile(
// result.A-B = i.A x scale.A
" mul %A[i], %A[scale] \n\t"
// save results...
// basic idea:
//" mov %A[result], r0 \n\t"
//" mov %B[result], r1 \n\t"
// which can be written as...
" movw %A[result], r0 \n\t"
// We actually don't need to do anything with r0,
// as result.A is never used again here, so we
// could just move the high byte, but movw is
// one clock cycle, just like mov, so might as
@@ -527,6 +637,7 @@ LIB8STATIC uint16_t scale16( uint16_t i, fract16 scale )
result = result >> 16;
return result;
#endif
#else
#error "No implementation for scale16 available."
#endif

0 comments on commit 94e46e3

Please sign in to comment.