In [21]:
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule

import numpy
a = numpy.random.randn(40,40)

a = a.astype(numpy.float32)

a_gpu = cuda.mem_alloc(a.size * a.dtype.itemsize)

cuda.memcpy_htod(a_gpu, a)

mod = SourceModule("""
    __global__ void doublify(float *a)
    {
      int idx = threadIdx.x + threadIdx.y*4;
      a[idx] *= 2;
    }
    """)

func = mod.get_function("doublify")
func(a_gpu, block=(4,4,1))

a_doubled = numpy.empty_like(a)
cuda.memcpy_dtoh(a_doubled, a_gpu)
print("original array:")
print(a_gpu)
print("doubled with gpuarray:")
print(a_doubled)

# alternate kernel invocation -------------------------------------------------

func(cuda.InOut(a), block=(4, 4, 1))
print("doubled with InOut:")
print(a)

# part 2 ----------------------------------------------------------------------

import pycuda.gpuarray as gpuarray
a_gpu = gpuarray.to_gpu(numpy.random.randn(4,4).astype(numpy.float32))
a_doubled = (2*a_gpu).get()

print("original array:")
print(a_gpu)
print("doubled with gpuarray:")
print(a_doubled)

original array:
<pycuda._driver.DeviceAllocation object at 0x7f2bac8255d0>
doubled with gpuarray:
[[ 2.09220648  3.61553216 -1.36869669 ..., -1.71180713 -1.85094392
   0.68620628]
 [ 1.04094625 -0.31823     0.5310449  ..., -0.44312555 -0.52778625
  -0.38822991]
 [-0.66438276  1.94193256 -0.37097788 ..., -0.20320164 -0.102201
  -0.02553154]
 ..., 
 [ 1.7212913  -0.55390817 -1.32760954 ..., -1.39914811  0.10575841
  -1.68045831]
 [ 2.1160481  -0.66727191  1.112661   ...,  1.42175162  0.34540194
   0.59938377]
 [ 0.30513272 -0.45355964  0.93997079 ..., -0.93283665 -2.15252495
  -0.58889049]]
doubled with InOut:
[[ 2.09220648  3.61553216 -1.36869669 ..., -1.71180713 -1.85094392
   0.68620628]
 [ 1.04094625 -0.31823     0.5310449  ..., -0.44312555 -0.52778625
  -0.38822991]
 [-0.66438276  1.94193256 -0.37097788 ..., -0.20320164 -0.102201
  -0.02553154]
 ..., 
 [ 1.7212913  -0.55390817 -1.32760954 ..., -1.39914811  0.10575841
  -1.68045831]
 [ 2.1160481  -0.66727191  1.112661   ...,  1.42175