## Exercise - CCCL - Customizing Algorithms - SOLUTION

In [None]:
import numpy as np
import cupy as cp
import cuda.compute as comp

### Excercise: computing the minimum value

In [None]:
"""
Using `reduce_into()` to compute the minimum value of a sequence
"""

d_input = cp.array([-2, 3, 5, 1, 7, -6, 8, -4], dtype=np.int32)
d_output = cp.empty(1, dtype=np.int32)


# begin TODO
MAX_INT = np.iinfo(np.int32).max
h_init = np.asarray([MAX_INT], dtype=np.int32)
comp.reduce_into(d_input, d_output, comp.OpKind.MINIMUM, len(d_input), h_init)
# end TODO

expected_output = -6
assert (d_output == expected_output).all()
result = d_output[0]
print(f"Min reduction result: {result}")

### Exercise: sort by the last digit

In [None]:
# Prepare the input and output arrays.
d_in_keys = cp.asarray([29, 9, 136, 1001, 72, 24, 32, 1], dtype="int32")

# define the custom comparator.
def comparison_op(lhs, rhs):
    return lhs % 10 < rhs % 10

# Perform the merge sort.
comp.merge_sort(
    d_in_keys,
    None,
    d_in_keys,
    None,
    comparison_op,
    d_in_keys.size,
)

print(f"Result: {d_in_keys}")
expected = np.asarray([1001, 1, 72, 32, 24, 136, 29, 9], dtype=np.int32)
assert (d_in_keys.get() == expected).all()

### Exercise 3: implementing running average

In [None]:
@comp.gpu_struct
class SumAndCount:
    sum: np.float32
    count: np.int32

def reduce_op(x, y) -> SumAndCount:
    return SumAndCount(
        x.sum + y.sum,
        x.count + y.count
    )

def compute_running_average(x: SumAndCount) -> np.float32:
    return x.sum / x.count

d_input = cp.array([2, 3, 5, 1, 7, 6, 8, 4], dtype=np.float32)
d_output = cp.empty(len(d_input), dtype=np.float32)
h_init = SumAndCount(0, 0)

# begin TODO
it_input = comp.ZipIterator(d_input, comp.ConstantIterator(np.int32(1)))
it_output = comp.TransformOutputIterator(d_output, compute_running_average)
# end TODO

# Perform the reduction.
comp.inclusive_scan(it_input, it_output, reduce_op, h_init, len(d_input))

print(f"Input sequence: {d_input}")

h_input = d_input.get()
expected = h_input.cumsum() / np.arange(1, len(h_input) + 1)

print(f"Running average result: {d_output}")
np.testing.assert_allclose(d_output.get(), expected)