diff --git a/common.h b/common.h index fe20834692..cf25fd5b0e 100644 --- a/common.h +++ b/common.h @@ -499,6 +499,8 @@ void blas_set_parameter(void); int blas_get_cpu_number(void); void *blas_memory_alloc (int); void blas_memory_free (void *); +void *blas_memory_alloc_nolock (int); //use malloc without blas_lock +void blas_memory_free_nolock (void *); int get_num_procs (void); diff --git a/driver/others/memory.c b/driver/others/memory.c index 4010ec974c..12172fd80c 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -1161,6 +1161,16 @@ void blas_memory_free(void *free_area){ return; } +void *blas_memory_alloc_nolock(int unused) { + void *map_address; + map_address = (void *)malloc(BUFFER_SIZE + FIXED_PAGESIZE); + return map_address; +} + +void blas_memory_free_nolock(void * map_address) { + free(map_address); +} + void blas_shutdown(void){ int pos; diff --git a/interface/gemv.c b/interface/gemv.c index f33973ef3d..6b0aadca0c 100644 --- a/interface/gemv.c +++ b/interface/gemv.c @@ -211,15 +211,24 @@ void CNAME(enum CBLAS_ORDER order, #ifdef MAX_STACK_ALLOC // make it volatile because some gemv implementation (ex: dgemv_n.S) // do not restore all register - volatile int stack_alloc_size = m + n; - if(stack_alloc_size < 128) + volatile int stack_alloc_size = 0; + if (trans == 0) { + stack_alloc_size = m + n; + if(stack_alloc_size < 128) //dgemv_n.S require a 128 bytes buffer stack_alloc_size = 128; - if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT)) + + if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT)) stack_alloc_size = 0; - FLOAT stack_buffer[stack_alloc_size]; - buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1); + FLOAT stack_buffer[stack_alloc_size]; + buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc_nolock(1); + + }else{ + //for gemv_t, only malloc + buffer = (FLOAT *)blas_memory_alloc_nolock(1); + } #else + //Original OpenBLAS/GotoBLAS codes. buffer = (FLOAT *)blas_memory_alloc(1); #endif @@ -251,10 +260,13 @@ void CNAME(enum CBLAS_ORDER order, #endif #ifdef MAX_STACK_ALLOC - if(!stack_alloc_size) -#endif + if(!stack_alloc_size){ + blas_memory_free_nolock(buffer); + } +#else blas_memory_free(buffer); - +#endif + FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n); IDEBUG_END;