Skip to content

Commit

Permalink
Force in-order execution of all DMA calls and bumped optimization level
Browse files Browse the repository at this point in the history
up to -O2.
Removed dead code in interrupt handler.
Modified constructor calling.
  • Loading branch information
Shaun Taylor committed Mar 1, 2012
1 parent 0010d9a commit bee4744
Show file tree
Hide file tree
Showing 13 changed files with 47 additions and 14 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
ROOTDIR = $(N64_INST)
CFLAGS = -std=gnu99 -O1 -G0 -Wall -Werror -mtune=vr4300 -march=vr4300 -I$(CURDIR)/include -I$(ROOTDIR)/include -I$(ROOTDIR)/mips64-elf/include
CFLAGS = -std=gnu99 -O2 -G0 -Wall -Werror -mtune=vr4300 -march=vr4300 -I$(CURDIR)/include -I$(ROOTDIR)/include -I$(ROOTDIR)/mips64-elf/include
ASFLAGS = -mtune=vr4300 -march=vr4300
N64PREFIX = $(N64_INST)/bin/mips64-elf-
INSTALLDIR = $(N64_INST)
Expand Down
2 changes: 1 addition & 1 deletion examples/vtest/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ N64TOOL = $(ROOTDIR)/bin/n64tool
HEADERNAME = header
LINK_FLAGS = -G0 -L$(ROOTDIR)/lib -L$(ROOTDIR)/mips64-elf/lib -ldragon -lc -lm -ldragonsys -Tn64ld.x
PROG_NAME = VidResTest
CFLAGS = -std=gnu99 -march=vr4300 -mtune=vr4300 -O1 -G0 -Wall -Werror -I$(ROOTDIR)/include -I$(ROOTDIR)/mips64-elf/include
CFLAGS = -std=gnu99 -march=vr4300 -mtune=vr4300 -G0 -Wall -Werror -I$(ROOTDIR)/include -I$(ROOTDIR)/mips64-elf/include
ASFLAGS = -mtune=vr4300 -march=vr4300
CC = $(GCCN64PREFIX)gcc
AS = $(GCCN64PREFIX)as
Expand Down
9 changes: 9 additions & 0 deletions include/n64sys.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,15 @@
*/
#define CachedAddr(_addr) (((void *)(((unsigned long)(_addr))&~0x20000000))

/**
* @brief Memory barrier to ensure in-order execution
*
* Since GCC seems to reorder volatile at -O2, a memory barrier is required
* to ensure that DMA setup is done in the correct order. Otherwise, the
* library is useless at higher optimization levels.
*/
#define MEMORY_BARRIER() asm volatile ("" : : : "memory")

#ifdef __cplusplus
extern "C" {
#endif
Expand Down
4 changes: 2 additions & 2 deletions include/regsinternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
*/
typedef struct AI_regs_s {
/** @brief Pointer to uncached memory buffer of samples to play */
void * address;
volatile void * address;
/** @brief Size in bytes of the buffer to be played. Should be
* number of stereo samples * 2 * sizeof( uint16_t )
*/
Expand Down Expand Up @@ -106,7 +106,7 @@ typedef struct VI_regs_s {
*/
typedef struct PI_regs_s {
/** @brief Uncached address in RAM where data should be found */
void * ram_address;
volatile void * ram_address;
/** @brief Address of data on peripheral */
uint32_t pi_address;
/** @brief How much data to read from RAM into the peripheral */
Expand Down
3 changes: 2 additions & 1 deletion n64ld_cpp.x
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,9 @@ SECTIONS {

.ctors : {
. = ALIGN(8);
__CTOR_LIST_SIZE__ = .;
LONG((__CTOR_END__ - __CTOR_LIST__) / 4 - 1)
__CTOR_LIST__ = .;
LONG((__CTOR_END__ - __CTOR_LIST__) / 4 - 2)
*(.ctors)
LONG(0)
__CTOR_END__ = .;
Expand Down
3 changes: 2 additions & 1 deletion n64ld_exp_cpp.x
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,9 @@ SECTIONS {

.ctors : {
. = ALIGN(8);
__CTOR_LIST_SIZE__ = .;
LONG((__CTOR_END__ - __CTOR_LIST__) / 4 - 1)
__CTOR_LIST__ = .;
LONG((__CTOR_END__ - __CTOR_LIST__) / 4 - 2)
*(.ctors)
LONG(0)
__CTOR_END__ = .;
Expand Down
3 changes: 3 additions & 0 deletions src/audio.c
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,13 @@ static void audio_callback()
now_playing = next;

AI_regs->address = UncachedAddr( buffers[now_playing] );
MEMORY_BARRIER();
AI_regs->length = (_buf_size * 2 * 2 ) & ( ~7 );
MEMORY_BARRIER();

/* Start DMA */
AI_regs->control = 1;
MEMORY_BARRIER();
}

/* Safe to enable interrupts here */
Expand Down
4 changes: 4 additions & 0 deletions src/controller.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,14 +111,18 @@ static void __controller_exec_PIF( void *inblock, void *outblock )
__SI_DMA_wait();

SI_regs->DRAM_addr = inblock_temp; // only cares about 23:0
MEMORY_BARRIER();
SI_regs->PIF_addr_write = PIF_RAM; // is it really ever anything else?
MEMORY_BARRIER();

__SI_DMA_wait();

data_cache_hit_writeback_invalidate(outblock_temp, 64);

SI_regs->DRAM_addr = outblock_temp;
MEMORY_BARRIER();
SI_regs->PIF_addr_read = PIF_RAM;
MEMORY_BARRIER();

__SI_DMA_wait();

Expand Down
2 changes: 2 additions & 0 deletions src/display.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ static void __write_registers( uint32_t const * const registers )
if( i == 4 ) { continue; }

reg_base[i] = registers[i];
MEMORY_BARRIER();
}
}

Expand All @@ -192,6 +193,7 @@ static void __write_dram_register( void const * const dram_val )
uint32_t *reg_base = (uint32_t *)REGISTER_BASE;

reg_base[1] = (uint32_t)dram_val;
MEMORY_BARRIER();
}

/**
Expand Down
12 changes: 12 additions & 0 deletions src/dma.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,13 @@ void dma_read(void * ram_address, unsigned long pi_address, unsigned long len)
disable_interrupts();

while (dma_busy()) ;
MEMORY_BARRIER();
PI_regs->ram_address = ram_address;
MEMORY_BARRIER();
PI_regs->pi_address = (pi_address | 0x10000000) & 0x1FFFFFFF;
MEMORY_BARRIER();
PI_regs->write_length = len-1;
MEMORY_BARRIER();
while (dma_busy()) ;

enable_interrupts();
Expand All @@ -94,9 +98,13 @@ void dma_write(void * ram_address, unsigned long pi_address, unsigned long len)
disable_interrupts();

while (dma_busy()) ;
MEMORY_BARRIER();
PI_regs->ram_address = ram_address;
MEMORY_BARRIER();
PI_regs->pi_address = (pi_address | 0x10000000) & 0x1FFFFFFF;
MEMORY_BARRIER();
PI_regs->read_length = len-1;
MEMORY_BARRIER();
while (dma_busy()) ;

enable_interrupts();
Expand All @@ -119,7 +127,9 @@ uint32_t io_read(uint32_t pi_address)

/* Wait until there isn't a DMA transfer and grab a word */
while (dma_busy()) ;
MEMORY_BARRIER();
retval = *uncached_address;
MEMORY_BARRIER();

enable_interrupts();

Expand All @@ -141,7 +151,9 @@ void io_write(uint32_t pi_address, uint32_t data)
disable_interrupts();

while (dma_busy()) ;
MEMORY_BARRIER();
*uncached_address = data;
MEMORY_BARRIER();

enable_interrupts();
}
Expand Down
7 changes: 5 additions & 2 deletions src/do_ctors.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* @brief C++ constructor handling
* @ingroup system
*/
#include <stdint.h>

/**
* @addtogroup system
Expand All @@ -12,6 +13,8 @@
/** @brief Function pointer */
typedef void (*func_ptr)(void);

/** @brief Pointer to the size of the constructor list */
extern uint32_t __CTOR_LIST_SIZE__;
/** @brief Pointer to the beginning of the constructor list */
extern func_ptr __CTOR_LIST__[];
/** @brief Pointer to the end of the constructor list */
Expand All @@ -22,8 +25,8 @@ extern func_ptr __CTOR_END__[];
*/
void __do_global_ctors()
{
unsigned int tot_constructors = *(unsigned int*)(__CTOR_LIST__);
for (void (**f)(void) = (void (**)(void))(__CTOR_LIST__ + 1); tot_constructors > 0; tot_constructors--, f++)
unsigned int tot_constructors = __CTOR_LIST_SIZE__;
for (void (**f)(void) = (void (**)(void))(__CTOR_LIST__); tot_constructors > 0; tot_constructors--, f++)
(**f)();
}

Expand Down
6 changes: 0 additions & 6 deletions src/inthandler.S
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,6 @@ inthandler:
sdc1 $f31,saveFR31

la sp,(exception_stack+65*1024-8)
//lui t0,0xa000
//or sp,t0
//or gp,t0

//j notcount
//nop

mfc0 k1,C0_CAUSE
andi $30,k1,0xff
Expand Down
4 changes: 4 additions & 0 deletions src/rdp.c
Original file line number Diff line number Diff line change
Expand Up @@ -224,15 +224,19 @@ static void __rdp_ringbuffer_send( void )

/* Clear XBUS/Flush/Freeze */
((uint32_t *)0xA4100000)[3] = 0x15;
MEMORY_BARRIER();

/* Don't saturate the RDP command buffer. Another command could have been written
* since we checked before disabling interrupts, but it is unlikely, so we probably
* won't stall in this critical section long. */
while( (((volatile uint32_t *)0xA4100000)[3] & 0x600) ) ;

/* Send start and end of buffer location to kick off the command transfer */
MEMORY_BARRIER();
((volatile uint32_t *)0xA4100000)[0] = ((uint32_t)rdp_ringbuffer | 0xA0000000) + rdp_start;
MEMORY_BARRIER();
((volatile uint32_t *)0xA4100000)[1] = ((uint32_t)rdp_ringbuffer | 0xA0000000) + rdp_end;
MEMORY_BARRIER();

/* We are good now */
enable_interrupts();
Expand Down

0 comments on commit bee4744

Please sign in to comment.