# PyCUDA Histogram

In [35]:
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
from pycuda.compiler import SourceModule
from pycuda.reduction import ReductionKernel

In [36]:
arr_min = 2.0
arr_max = 10.0
arr_size = 20
arr = np.random.randint(arr_min, arr_max,size=arr_size, dtype=np.int32)
bins = 5
numpy_hist = np.histogram(arr, bins,range=(arr_min, arr_max))

In [37]:
numpy_hist

(array([2, 4, 1, 8, 5], dtype=int64),
 array([  2. ,   3.6,   5.2,   6.8,   8.4,  10. ]))

In [38]:
mod = SourceModule('''
__global__ void hist(float* arr,int* counts, int* size, int* bins,float* min,float* max)
{
    // i -> arr elements; j -> bins;
    
    size_t i = blockIdx.x*blockDim.x + threadIdx.x;
    size_t j = blockIdx.y*blockDim.y + threadIdx.y;
    
    
    if (i < size[0])
    {
        if (j<bins[0])
        {
            if (arr[i] > (float)(min[0]+(j-1)*(max[0]-min[0])/bins[0]) && arr[i] < (float)(min[0]+j*(max[0]-min[0])/bins[0]))
            {
                counts[j] = counts[j] + 1;
            }
        }
    }
}
''')

func = mod.get_function("hist")

In [39]:
count = np.zeros(bins+1, dtype=np.int32)
min_arr = np.array([arr_min]).astype(np.float32)
max_arr = np.array([arr_max]).astype(np.float32)
size_arr = np.array([arr_size]).astype(np.int32)
bins_arr = np.array([bins]).astype(np.int32)

In [40]:
func(cuda.In(arr), cuda.InOut(count), cuda.In(size_arr), cuda.In(bins_arr), cuda.In(min_arr), cuda.In(max_arr),
    block=(1, 8, 1), grid=(arr_size, 1, 1) 
    )

In [41]:
#count