Permalink
Browse files

12% speedup in noise on AVR (net with previous commit) by using inlin…

…e avg15 on AVRs with MUL. Code size goes up 8 bytes for 16-bit, 3-D noise.
  • Loading branch information...
kriegsman committed Jan 7, 2016
1 parent 3a8bb4c commit 7edd233f79e0ed2b6af0ae6251f212680b31a6b6
Showing with 24 additions and 0 deletions.
  1. +24 −0 noise.cpp
View
@@ -24,8 +24,32 @@ FL_PROGMEM static uint8_t const p[] = { 151,160,137,91,90,15,
#if FASTLED_NOISE_ALLOW_AVERAGE_TO_OVERFLOW == 1
#define AVG15(U,V) (((U)+(V)) >> 1)
#else
+// See if we should use the inlined avg15 for AVR with MUL instruction
+#if defined(__AVR__) && (LIB8_ATTINY == 0)
+#define AVG15(U,V) (avg15_inline_avr_mul((U),(V)))
+// inlined copy of avg15 for AVR with MUL instruction; cloned from math8.h
+// Forcing this inline in the 3-D 16bit noise produces a 12% speedup overall,
+// at a cost of just +8 bytes of net code size.
+static int16_t inline __attribute__((always_inline)) avg15_inline_avr_mul( int16_t i, int16_t j)
+{
+ asm volatile(
+ /* first divide j by 2, throwing away lowest bit */
+ "asr %B[j] \n\t"
+ "ror %A[j] \n\t"
+ /* now divide i by 2, with lowest bit going into C */
+ "asr %B[i] \n\t"
+ "ror %A[i] \n\t"
+ /* add j + C to i */
+ "adc %A[i], %A[j] \n\t"
+ "adc %B[i], %B[j] \n\t"
+ : [i] "+a" (i)
+ : [j] "a" (j) );
+ return i;
+}
+#else
#define AVG15(U,V) (avg15((U),(V)))
#endif
+#endif
//
// #define FADE_12

0 comments on commit 7edd233

Please sign in to comment.