In [None]:
import numpy as np
from oneapi import dpl as dp

# Define the size of the arrays
n = 1000

# Define the input arrays in host memory
a = np.arange(n, dtype=np.float32)
b = np.arange(n, dtype=np.float32) + 1

# Create oneAPI buffer objects to store data in device memory
# buffer_a will hold the data of array a
buffer_a = dp.device_allocator(np.float32).allocate(n)
# buffer_b will hold the data of array b
buffer_b = dp.device_allocator(np.float32).allocate(n)
# buffer_result will hold the result of the operation
buffer_result = dp.device_allocator(np.float32).allocate(n)

# Copy the data from host memory to device memory
# Copy the data of array a from host memory to buffer_a in device memory
dp.device_memcpy(buffer_a, a)
# Copy the data of array b from host memory to buffer_b in device memory
dp.device_memcpy(buffer_b, b)

# Use oneAPI's parallel_for to perform parallel array multiplication operation
# This will multiply the corresponding elements of buffer_a and buffer_b
# and store the result in buffer_result. The lambda function defines the operation
# for each element at index i.
dp.parallel_for(n, dp.DEVICE_DEFAULT, lambda i: buffer_a[i] * buffer_b[i])

# Copy the result from device memory back to host memory
# Create an empty array in host memory to receive the result
result = np.empty(n, dtype=np.float32)
# Copy the result from buffer_result in device memory to the result array in host memory
dp.device_memcpy(result, buffer_result)

# Release the device memory buffers to free up memory
# Deallocate the buffer_a to release the memory it occupies in device memory
buffer_a.deallocate()
# Deallocate the buffer_b to release the memory it occupies in device memory
buffer_b.deallocate()
# Deallocate the buffer_result to release the memory it occupies in device memory
buffer_result.deallocate()

# Calculate the sum of the result array
# Use numpy's sum function to calculate the total sum of the elements in the result array
total_result = np.sum(result)

print("The sum of the array multiplication results is:", total_result)