<a href="https://colab.research.google.com/github/MariaOmaechevarria/OPENCL/blob/main/Mult_Mat_Memoria_Coallesced.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

NECESARIO PARA EJECUTAR PYOPENCL EN GOOGLE COLLAB (APROX 5 MINUTOS)

In [None]:
!sudo apt update
!sudo apt purge *nvidia* -y
!sudo apt install nvidia-driver-530 -y


!pip install pyopencl
!apt-get install -y pocl-opencl-icd ocl-icd-libopencl1

LIBRERIAS USADAS Y ACCESO GOOGLE DRIVE

In [None]:
import pyopencl as cl
import numpy as np
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')

KERNEL MEMORY COALLESCED

In [None]:
program_text=""" __kernel void MatrixMul_kernel_coallesced_row(int dim,__global int *A,__global int *B,__global int *C)
{
   //Get the index of the work-item
    int iCol = get_global_id(0);
    int iRow = get_global_id(1);
    int localIdx = get_local_id(0);
    int localSizex = get_local_size(0);

    float result = 0.0f;
    int numElements = dim/localSizex;
    for(int j=0; j<numElements; j++)
    {
      result = 0.0f;
      for(int i=0;i< dim;++i)
      {
         result += A[iRow*dim + i]*B[i*dim + j*localSizex + localIdx];
       }
      C[iRow*dim + j*localSizex + iCol] = result;
    }
    }
"""

FUNCION PRINCIPAL QUE REALIZA LA MULTIPLICACIÓN DE DOS MATRICES DE CIERTA DIMENSION EN UN DEVICE CON UN DADO LOCAL_SIZE

In [None]:
def mult_mat_memoria_coallesced(dim:int,local_size:tuple,device_type,program_text,A,B):

  # Plataforma
  platform = cl.get_platforms()[0]

  # Dispositivo (GPU)
  device = platform.get_devices(device_type=device_type)[0]

  # Crear contexto con el dispositivo seleccionado
  context = cl.Context([device])

  # Crear una cola de comandos
  command_queue = cl.CommandQueue(context, device=device, properties=cl.command_queue_properties.PROFILING_ENABLE)

  # Crear el programa y compilarlo
  program = cl.Program(context, program_text)
  try:
       program.build()
  except Exception as e:
    print("Build log:")
    print(program.get_build_info(device, cl.program_build_info.LOG))
    raise e

  # Crear el kernel
  mult_kernel = cl.Kernel(program, 'MatrixMul_kernel_coallesced_row')

  # Crear matrices
  C = np.zeros((dim, dim), dtype=np.int32)

  # Crear Buffers
  bufA = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=A)
  bufB = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=B)
  bufC = cl.Buffer(context, cl.mem_flags.WRITE_ONLY, C.nbytes)  # C solo necesita espacio de escritura

  # Argumentos del kernel
  mult_kernel.set_arg(0, np.int32(dim))  # Dimensión como int32
  mult_kernel.set_arg(1, bufA)            # Buffer A
  mult_kernel.set_arg(2, bufB)            # Buffer B
  mult_kernel.set_arg(3, bufC)            # Buffer C

  # Ejecutar el kernel y registrar el evento
  global_size = (dim, dim)  # Tamaño global
  event = cl.enqueue_nd_range_kernel(command_queue, mult_kernel, global_size, local_size)

  # Esperar a que se complete el evento
  event.wait()

  # Obtener el tiempo de ejecución en nanosegundos
  exec_time = 1e-9 * (event.profile.end - event.profile.start)

  # Leer el buffer C
  cl.enqueue_copy(command_queue, C, bufC).wait()  # Asegúrate de que la operación se complete


  return exec_time,C


FUNCION MAIN QUE REALIZA LA MULT DE MATRICES PARA VARIAS DIMENSIONES Y DISTINTOS LOCAL SIZE(Nº WORK ITEMS EN CADA WORK_GROUP)

In [None]:
def main():

  device_type=cl.device_type.GPU
  index = [(f"({2 ** i}/{2 ** i})" if i != 0 else "(1/1)") for i in range(0, 5)]
  columns = [2 ** i for i in range(1, 14)]  # 2^1 a 2^13 (de 2 a 8192)
  results_df = pd.DataFrame(index=index, columns=columns)

  i=1
  while i<=16:

    local_size=(i,i)
    dim=i

    while dim<=2048:

       A = np.random.randint(0, 10, size=(dim, dim)).astype(np.int32)
       B = np.random.randint(0, 10, size=(dim, dim)).astype(np.int32)

       exec_time,C=mult_mat_memoria_coallesced(dim,local_size,device_type,program_text,A,B)

       results_df.loc[f"({i}/{i})", dim] = exec_time if exec_time is not None else "NP"

       dim*=2

       del A,B

    i*=2

  #Guardar los resultados
  results_df.to_csv('/content/drive/My Drive/Colab Notebooks/TFG_OPENCL/MULTIPLICACION DE MATRICES/Mult_Mat_Memoria_Coallesced_CPU.csv',index=True)

  return results_df


results_df=main()


LogicError: Context failed: <unknown error -9999>