Skip to content

Commit

Permalink
Merge pull request #65 from davidweichiang/reduction
Browse files Browse the repository at this point in the history
round reduction size up to nearest power of two to avoid overloading cache
  • Loading branch information
abergeron committed Aug 5, 2015
2 parents 172cf4a + 47d2551 commit da08a6b
Showing 1 changed file with 8 additions and 6 deletions.
14 changes: 8 additions & 6 deletions pygpu/reduction.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@
from .tools import ArrayArg, check_args, prod, lfu_cache
from .elemwise import parse_c_args, massage_op

def _ceil_log2(x):
# nearest power of 2 (going up)
if x != 0:
return int(math.ceil(math.log(x, 2)))
else:
return 0

basic_kernel = Template("""
${preamble}
Expand Down Expand Up @@ -172,11 +178,7 @@ def __init__(self, context, dtype_out, neutral, reduce_expr, redux,

def _find_kernel_ls(self, tmpl, max_ls, *tmpl_args):
local_size = min(self.init_local_size, max_ls)
# nearest power of 2 (going up)
if local_size != 0:
count_lim = int(math.ceil(math.log(local_size, 2)))
else:
count_lim = 0
count_lim = _ceil_log2(local_size)
local_size = 2**count_lim
loop_count = 0
while loop_count <= count_lim:
Expand Down Expand Up @@ -248,7 +250,7 @@ def __call__(self, *args, **kwargs):
if self.init_local_size < n:
k, _, _, ls = self._get_basic_kernel(self.init_local_size, nd)
else:
k, _, _, ls = self._get_basic_kernel(n, nd)
k, _, _, ls = self._get_basic_kernel(2**_ceil_log2(n), nd)

kargs = [n, out]
kargs.extend(dims)
Expand Down

0 comments on commit da08a6b

Please sign in to comment.