In [1]:
import cupy as cp 
import numpy as np 
import time

In [2]:
print(f"Cupy version {cp.__version__}")
print(f"Numpy version {np.__version__}")

Cupy version 13.6.0
Numpy version 2.0.2


#### Test 1 - Creating a large array on GPU and CPU and timing how long a simple array operation will take

##### Conclusion - As expected, GPU performs better. The constraint is memory here. Throw in a couple more zeros to the array sizes and GPU will run out of memory. 

In [3]:
a = cp.random.randn(100000000)
b = cp.random.randn(100000000)
cp.cuda.Device().synchronize()
start = time.perf_counter()

c = a + b

cp.cuda.Device().synchronize()
print(f"Cupy - Total time taken for this ops {time.perf_counter()-start}")

Cupy - Total time taken for this ops 0.03169144599996798


In [4]:
##These are all directly CPU operations
x = np.random.randn(100000000)
y = np.random.randn(100000000)

cpu_start = time.perf_counter()
z = x + y
print(f"Total time taken for this ops {time.perf_counter()-cpu_start}")

Total time taken for this ops 0.11027711200040358


In [5]:
##Delete all objects to free up memory for the nezt operation

#CPU objects
del x, y, z

##GPU objects
del a, b , c  ##Apparently this is not simply enough
cp.get_default_memory_pool().free_all_blocks()

In [6]:
### Test 2 - Copying arrays between CPU <-> GPU (vice versa)


In [7]:
a = np.random.rand(1000,1000)

print(a[0][0])

##Copying it to GPU now
x = cp.asarray(a)
x = x +1
print(x[0][0])

##Copy it back to CPU - Note there is no "Movement"  without creating a copy
b = cp.asnumpy(x)
print(b[0][0])


0.23471182654184464
1.2347118265418446
1.2347118265418446


In [8]:
p = cp.dot(b,b)
print(type(p))
print(p.device)

<class 'numpy.ndarray'>
cpu
