<a href="https://colab.research.google.com/github/Gbrasce/JupyterHPCUCA/blob/main/Copia_de_ejemplo1Final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Trabajo con CPU.

In [None]:
import numpy as np

import time

# generating 1000 x 1000 matrices
np.random.seed(42)

x = np.random.randint(0,256, size=(10000,10000)).astype("float64")

y = np.random.randint(0,256, size=(10000,10000)).astype("float64")


#computing multiplication time on CPU
tic = time.time()

z = np.matmul(x,y)

toc = time.time()

time_taken = toc - tic #time in s

print("Time taken on CPU (in ms) = {}".format(time_taken*1000))
print("Tiempo en segundos = {}".format(time_taken))

Time taken on CPU (in ms) = 52165.71283340454
Tiempo en segundos = 52.16571283340454


# Trabajo con GPU

In [None]:
!pip install pycuda
!pip install scikit-cuda
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
import numpy as np
import skcuda.linalg as linalg
import time
#computing multiplication time on GPU
linalg.init()
x = np.random.randint(0,256, size=(10000,10000)).astype("float64")

y = np.random.randint(0,256, size=(10000,10000)).astype("float64")

# storing the arrays on GPU
x_gpu = gpuarray.to_gpu(x)

y_gpu = gpuarray.to_gpu(y)

tic = time.time()

#performing the multiplication
z_gpu = linalg.mdot(x_gpu, y_gpu)

toc = time.time()

time_taken = toc - tic #time in s

print("Time taken on a GPU (in ms) = {}".format(time_taken*1000))
print("Tiempo en segundos = {}".format(time_taken))

Time taken on a GPU (in ms) = 11987.776756286621
Tiempo en segundos = 11.987776756286621


In [None]:
!pip install pyopencl



In [None]:
#!/usr/bin/env python

import numpy as np
import pyopencl as cl

a_np = np.random.rand(50000).astype(np.float32)
b_np = np.random.rand(50000).astype(np.float32)

#platform = cl.get_platforms()
#my_gpu_devices = platform[0].get_devices(device_type=cl.device_type.GPU)
#ctx = cl.Context(devices=my_gpu_devices)

ctx = cl.create_some_context()

queue = cl.CommandQueue(ctx)

mf = cl.mem_flags
a_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=a_np)
b_g = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=b_np)

prg = cl.Program(ctx, """
__kernel void sum( __global const float *a_g, __global const float *b_g, __global float *res_g)
{
  int gid = get_global_id(0);
  res_g[gid] = a_g[gid] + b_g[gid];
}
""").build()

res_g = cl.Buffer(ctx, mf.WRITE_ONLY, a_np.nbytes)
prg.sum(queue, a_np.shape, None, a_g, b_g, res_g)

res_np = np.empty_like(a_np)
cl.enqueue_copy(queue, res_np, res_g)

print(">CTRL1: es a, b y res")
print(a_np)
print(b_np)
print(res_np)
print(">CTRL2: es a + b")
print(a_np + b_np)

# Check on CPU with Numpy:
print(">CTRL3: (res - (a + b))")
print(res_np - (a_np + b_np))
print(np.linalg.norm(res_np - (a_np + b_np)))
assert np.allclose(res_np, a_np + b_np)
print("\n>Fin: El programa calcula correctamente.")

>CTRL1: es a, b y res
[0.25342506 0.6360239  0.4378942  ... 0.730203   0.08181263 0.3643122 ]
[0.75097644 0.87491316 0.62254435 ... 0.806247   0.9499515  0.02636318]
[1.0044014  1.510937   1.0604385  ... 1.5364499  1.0317641  0.39067537]
>CTRL2: es a + b
[1.0044014  1.510937   1.0604385  ... 1.5364499  1.0317641  0.39067537]
>CTRL3: (res - (a + b))
[0. 0. 0. ... 0. 0. 0.]
0.0

>Fin: El programa calcula correctamente.
