<a href="https://colab.research.google.com/github/MariaOmaechevarria/OPENCL/blob/main/Mult_Mat_Memoria_Local.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!sudo apt update
!sudo apt purge *nvidia* -y
!sudo apt install nvidia-driver-530 -y


!pip install pyopencl
!apt-get install -y pocl-opencl-icd ocl-icd-libopencl1

In [None]:
import pyopencl as cl
import numpy as np
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')

In [None]:
program_text="""
    __kernel void MatrixMul_kernel_localA(int dim, __global float *A, __global float *B, __global float *C, __local float *lA) {

        //Get the index of the work-item
             int iCol = get_global_id(0);
             int iRow = get_global_id(1);
             int localIdx = get_local_id(0);
             int localSizex = get_local_size(0);


             float result = 0.0f;
             int numElements = dim/localSizex;

             for(int i=0; i<numElements ; i++)
                 {
                  lA[localIdx*numElements + i] = A[iRow*dim + localIdx*numElements + i];
                   }

             barrier(CLK_LOCAL_MEM_FENCE);

             for(int i=0;i< dim;++i)
                   {
                       result += lA[i]*B[i*dim + iCol];
                     }
             C[iRow*dim + iCol] = result;
}
"""

In [None]:
def mult_mat_local(dim:int,local_size:tuple,device_type,program_text,A,B):

  # Plataforma
  platform = cl.get_platforms()[0]

  # Dispositivo (GPU)
  device = platform.get_devices(device_type=device_type)[0]

  # Crear contexto con el dispositivo seleccionado
  context = cl.Context([device])

  # Crear una cola de comandos
  command_queue = cl.CommandQueue(context, device=device, properties=cl.command_queue_properties.PROFILING_ENABLE)

  # Crear el programa y compilarlo
  program = cl.Program(context, program_text)
  try:
       program.build()
  except Exception as e:
    print("Build log:")
    print(program.get_build_info(device, cl.program_build_info.LOG))
    raise e

  # Crear el kernel
  kernel = cl.Kernel(program, 'MatrixMul_kernel_localA')

  # Inicializar matrices  C
  C = np.zeros((dim, dim), dtype=np.float32)

  # Crear buffers en el dispositivo
  mf = cl.mem_flags
  buffer_A = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=A)
  buffer_B = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=B)
  buffer_C = cl.Buffer(context, mf.WRITE_ONLY, C.nbytes)

    # Configurar argumentos del kernel
  local_mem_size = local_size[0] * local_size[1] * np.dtype(np.float32).itemsize
  kernel.set_arg(0, np.int32(dim))
  kernel.set_arg(1, buffer_A)
  kernel.set_arg(2, buffer_B)
  kernel.set_arg(3, buffer_C)
  kernel.set_arg(4, cl.LocalMemory(local_mem_size))

  # Definir el tamaño global y local
  global_size = (dim, dim)

  # Ejecutar el kernel
  event = cl.enqueue_nd_range_kernel(command_queue, kernel, global_size, local_size)
  event.wait()

  # Medir tiempo de ejecución
  start_time = event.profile.start
  end_time = event.profile.end
  execution_time = (end_time - start_time) * 1e-9  # Convertir a segundos

  # Leer el resultado del buffer C
  cl.enqueue_copy(command_queue, C, buffer_C).wait()

  return execution_time, C









In [None]:
def main():

  device_type=cl.device_type.CPU
  index = [(f"({2 ** i}/{2 ** i})" if i != 0 else "(1/1)") for i in range(0, 5)]
  columns = [2 ** i for i in range(1, 14)]  # 2^1 a 2^13 (de 2 a 8192)
  results_df = pd.DataFrame(index=index, columns=columns)

  i=1
  while i<=16:

    local_size=(i,i)
    dim=i

    while dim<=2048:

       A = np.random.randint(0, 10, size=(dim, dim)).astype(np.int32)
       B = np.random.randint(0, 10, size=(dim, dim)).astype(np.int32)

       exec_time,C=mult_mat_local(dim,local_size,device_type,program_text,A,B)

       results_df.loc[f"({i}/{i})", dim] = exec_time if exec_time is not None else "NP"

       dim*=2

       del A,B

    i*=2

  #Guardar los resultados
  results_df=results_df.drop(columns=[1])
  results_df.to_csv('/content/drive/My Drive/Colab Notebooks/TFG_OPENCL/MULTIPLICACION DE MATRICES/Mult_Mat_Memoria_Local_CPU.csv',index=True)

  return results_df


results_df=main()


In [None]:
dim=4
local_size=(2,2)
device_type=cl.device_type.CPU


A=[[7, 2, 0 ,2],[0, 3, 0 ,5],[1, 1 ,3, 1],[8 ,9 ,1 ,7]]

B=[[2, 9 ,8 ,4],
 [2, 5, 4 ,7],
 [6 ,7 ,1 ,3],
 [9, 0 ,9 ,8]]
A = np.array(A, dtype=np.int32)
B = np.array(B, dtype=np.int32)

exec_time,C=mult_mat_basica(dim,local_size,device_type,program_text,A,B)
print(C)