Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Working SPI ISR optimizations with ~30% performance improvement #10

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# ShiftPWM-Redux
Arduino Library for software PWM with shift registers

This branch optimizes the SPI sending code.

The new code is about 30% faster than the original. You can use the extra time to either have more foreground load, or to increase the PWM frequency.


141 changes: 110 additions & 31 deletions ShiftPWM.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,16 +70,99 @@ extern const bool ShiftPWM_balanceLoad;
#endif

// The macro below uses 3 instructions per pin to generate the byte to transfer with SPI
// Retreive duty cycle setting from memory (ldd, 2 clockcycles)
// Retrieve duty cycle setting from memory (ldd, 2 clockcycles)
// Compare with the counter (cp, 1 clockcycle) --> result is stored in carry
// Use the rotate over carry right to shift the compare result into the byte. (1 clockcycle).
#define add_one_pin_to_byte(sendbyte, counter, ledPtr) \
{ \
unsigned char pwmval=*ledPtr; \
asm volatile ("cp %0, %1" : /* No outputs */ : "r" (counter), "r" (pwmval): ); \
asm volatile ("ror %0" : "+r" (sendbyte) : "r" (sendbyte) : ); \
}

// len is the number of 8-bit bytes to send out the SPI
// buf points to the last byte of a buffer of values to compare and send. The last byte in the buffer is sent first.
// counter is threshold. Each byte in the buff is compared to count and if it is greater, then a 1 bit is sent.

static __attribute__ ((always_inline)) void send_spi_bytes( unsigned len, unsigned char counter, const unsigned char *buf, const unsigned char imask, const unsigned char step )
{
register unsigned char sendchar;

asm volatile (

"LOOP_LEN_%=: \n\t"

// Shift PWM Bit #0
"ld __tmp_reg__, -%a[buf] \n\t"
"cp %[counter], __tmp_reg__ \n\t"
"ror %[sendchar] \r\n"

// Shift PWM Bit #1
"ld __tmp_reg__, -%a[buf] \n\t"
"cp %[counter], __tmp_reg__ \n\t"
"ror %[sendchar] \r\n"

// Shift PWM Bit #2
"ld __tmp_reg__, -%a[buf] \n\t"
"cp %[counter], __tmp_reg__ \n\t"
"ror %[sendchar] \r\n"

// Shift PWM Bit #3
"ld __tmp_reg__, -%a[buf] \n\t"
"cp %[counter], __tmp_reg__ \n\t"
"ror %[sendchar] \r\n"

// Shift PWM Bit #4
"ld __tmp_reg__, -%a[buf] \n\t"
"cp %[counter], __tmp_reg__ \n\t"
"ror %[sendchar] \r\n"

// Shift PWM Bit #5
"ld __tmp_reg__, -%a[buf] \n\t"
"cp %[counter], __tmp_reg__ \n\t"
"ror %[sendchar] \r\n"

// Shift PWM Bit #6
"ld __tmp_reg__, -%a[buf] \n\t"
"cp %[counter], __tmp_reg__ \n\t"
"ror %[sendchar] \r\n"

// Shift PWM Bit #7
"ld __tmp_reg__, -%a[buf] \n\t"
"cp %[counter], __tmp_reg__ \n\t"
"ror %[sendchar] \n\t"

// Potentially invert bits
"eor %[sendchar],%[imask] \n\t"

// Check that previous send completed
"WAIT_SPI_%=: \n\t"
"in __tmp_reg__,%[spsr] \n\t"
"sbrs __tmp_reg__,%[spif] \n\t"
"rjmp WAIT_SPI_%= \n\t"

// Send new byte
"out %[spdr], %[sendchar] \n\t"

// repeat until all shift registers are filled
"sbiw %[len], 1 \n\t"
"brne LOOP_LEN_%= \n\t"

: // Outputs:
[sendchar] "=&r" (sendchar) // working register

: // Inputs:
[buf] "e" (buf), // pointer to buffer
[len] "r" (len), // length of buffer
[counter] "r" (counter), // current threshold
[imask] "r" (imask), // XORed to final byte before sent to potentially invert bits
[step] "r" (step), // increment counter by this much for each byte sent for load balancing

// Constants
[spdr] "I" (_SFR_IO_ADDR(SPDR)), // SPI data register
[spsr] "I" (_SFR_IO_ADDR(SPSR)), // SPI status register
[spif] "I" (SPIF) // SPI interrupt flag (send complete) (should really be a constraint for 0-7 value)

: // Clobbers
"cc" // special name that indicates that flags may have been clobbered

);

}
// The inline function below uses normal output pins to send one bit to the SPI port.
// This function is used in the noSPI mode and is useful if you need the SPI port for something else.
// It is a lot 2.5x slower than the SPI version.
Expand All @@ -102,7 +185,7 @@ static inline void ShiftPWM_handleInterrupt(void){
// Look up which bit of which output register corresponds to the pin.
// This should be constant, so the compiler can optimize this code away and use sbi and cbi instructions
// The compiler only knows this if this function is compiled in the same file as the pin setting.
// That is the reason the full funcion is in the header file, instead of only the prototype.
// That is the reason the full function is in the header file, instead of only the prototype.
// If this function is defined in cpp files of the library, it is compiled seperately from the main file.
// The compiler does not recognize the pins/ports as constant and sbi and cbi instructions cannot be used.

Expand All @@ -128,29 +211,24 @@ static inline void ShiftPWM_handleInterrupt(void){

#ifndef SHIFTPWM_NOSPI
//Use SPI to send out all bits
SPDR = 0; // write bogus bit to the SPI, because in the loop there is a receive before send.
for(unsigned char i = ShiftPWM.m_amountOfRegisters; i>0;--i){ // do a whole shift register at once. This unrolls the loop for extra speed
unsigned char sendbyte; // no need to initialize, all bits are replaced
if(ShiftPWM_balanceLoad){
counter +=8; // distribute the load by using a shifted counter per shift register
}
add_one_pin_to_byte(sendbyte, counter, --ledPtr);
add_one_pin_to_byte(sendbyte, counter, --ledPtr);
add_one_pin_to_byte(sendbyte, counter, --ledPtr);
add_one_pin_to_byte(sendbyte, counter, --ledPtr);

add_one_pin_to_byte(sendbyte, counter, --ledPtr);
add_one_pin_to_byte(sendbyte, counter, --ledPtr);
add_one_pin_to_byte(sendbyte, counter, --ledPtr);
add_one_pin_to_byte(sendbyte, counter, --ledPtr);

while (!(SPSR & _BV(SPIF))); // wait for last send to finish and retreive answer. Retreive must be done, otherwise the SPI will not work.
if(ShiftPWM_invertOutputs){
sendbyte = ~sendbyte; // Invert the byte if needed.
}
SPDR = sendbyte; // Send the byte to the SPI
}

// write bogus byte to the SPI because the SPI hardware only signals "transmit complete" rather than "transmitter idle".
// This byte will harmlessly be shifted off the edge of the final shift register
SPDR = 0;

// If we are inverting outputs, then XORing the output byte with 0xff will flip all bits. XORing with 0x00 does nothing (except waste a cycle).
// TODO: Invert the bits when they are put into the buffer so we only do it once rather than on every INT
const unsigned char imask = ShiftPWM_invertOutputs? 0xff: 0x00;

// This value gets added to the counter after each byte is sent to avoid having all loads turning on and off at the same time.
// TODO: Is there any use case where you would *not* want to balance the output? If not, get rid of the option and calculate the best step value to most evenly distribute load over all outputs
const unsigned char step = ShiftPWM_balanceLoad ? 8 : 0;

send_spi_bytes(ShiftPWM.m_amountOfRegisters,counter,ledPtr, imask, step);

while (!(SPSR & _BV(SPIF))); // wait for last send to complete.


#else
//Use port manipulation to send out all bits
for(unsigned char i = ShiftPWM.m_amountOfRegisters; i>0;--i){ // do one shift register at a time. This unrolls the loop for extra speed
Expand All @@ -170,7 +248,8 @@ static inline void ShiftPWM_handleInterrupt(void){

// Write shift register latch clock high
bitSet(*latchPort, latchBit);


// TODO: spin backwards to save one cycle on compare
if(ShiftPWM.m_counter<ShiftPWM.m_maxBrightness){
ShiftPWM.m_counter++; // Increase the counter
}
Expand Down