# Tests for Dense Layer

To make sure that the output from the GPU is correct, you should test them against what you get with the original CPU implementation. The following code is what you get from the original implementation on the CPU.

You should do the same for any other layer you implement.

In [1]:
import numpy as np

import pyopencl as cl
import pyopencl.array as cl_array
import PyNetwork
from PyNetwork.layers.Dense_GPU import Dense_GPU

In [2]:
input_shape = (10,)
output_nodes = 100

layer = PyNetwork.layers.Dense(output_nodes, 'relu')
layer.build(input_shape)

In [3]:
platform = cl.get_platforms()
devices = platform[0].get_devices()
context = cl.Context(devices)
queue = cl.CommandQueue(context)

layer_gpu = Dense_GPU(context = context, queue = queue, hidden_nodes = output_nodes, activation_function='relu')
layer_gpu.build(input_shape)

# Test Predict

In [4]:
n_datapoints = 20
z_in = np.random.rand(n_datapoints, *input_shape).astype(np.float32) - 0.5

a_true, z_out_true = layer.predict(z_in, output_only=False)

In [5]:
z_in_gpu = cl_array.to_device(queue, z_in)
a_true_gpu, z_out_true_gpu = layer_gpu.predict(z_in_gpu, output_only=False)

# Test Backprop

In [6]:
g_prime = np.random.rand(n_datapoints, *input_shape).astype(np.float32)
new_delta = np.random.rand(n_datapoints, output_nodes).astype(np.float32)

delta_true = layer.get_delta_backprop_(g_prime, new_delta)

In [7]:
g_prime_gpu = cl_array.to_device(queue, g_prime)
new_delta_gpu =cl_array.to_device(queue, new_delta)

delta_true_gpu = layer_gpu.get_delta_backprop_(g_prime_gpu, new_delta_gpu)

# Test Weight Gradients

In [8]:
delta = np.random.rand(n_datapoints, *input_shape).astype(np.float32)
prev_z = np.random.rand(n_datapoints, output_nodes).astype(np.float32)

delta_grad_true, w_grad_true = layer.get_weight_grad_(delta, prev_z)

In [9]:
delta_gpu= cl_array.to_device(queue, delta)
prev_z_gpu =cl_array.to_device(queue, prev_z)

delta_grad_true_gpu, w_grad_true_gpu = layer_gpu.get_weight_grad_(delta_gpu, prev_z_gpu)

In [10]:
a = np.array([1,2,5]).astype(np.float32)
a_gpu = cl_array.to_device(queue, a)

b = np.array([10]).astype(np.float32)
b_gpu = cl_array.to_device(queue, b)


In [11]:
x = cl_array.max(a_gpu)

  warn("Non-empty compiler output encountered. Set the "


In [12]:
x = np.array([[1,2,3],[4,5,6],[7,8,9]])
x_gpu = cl_array.to_device(queue, x)

In [13]:
np.max(x, axis=-1)

array([3, 6, 9])

In [14]:
y = np.array([1e-10]).astype(np.float32)
y

array([1.e-10], dtype=float32)

In [15]:
ep = cl_array.to_device(queue, y)
ep

cl.Array([1.e-10], dtype=float32)

In [16]:
from PyNetwork.gpu.GPUNN import GPUOPERATOR

In [17]:
op = GPUOPERATOR(context=context, queue=queue)

In [18]:
op.repeat(ep, 10).shape

(1, 10)

In [19]:
std = np.array([1,2,3,6,4,5]).astype(np.float32)

In [20]:
std_gpu = cl_array.to_device(queue, std)

In [21]:
std_gpu.reshape(1, -1)

cl.Array([[1., 2., 3., 6., 4., 5.]], dtype=float32)

In [22]:
a = std_gpu.reshape(1, -1) + op.repeat(ep, 6)

In [23]:
a.squeeze()

cl.Array([1., 2., 3., 6., 4., 5.], dtype=float32)

In [29]:
1 / std_gpu 

cl.Array([1.        , 0.5       , 0.33333334, 0.16666667, 0.25      ,
       0.2       ], dtype=float32)

In [30]:
len(std_gpu)

6

In [24]:
ep

cl.Array([1.e-10], dtype=float32)

In [25]:
op.broadcast_scalar(ep, std_gpu)

cl.Array([1.e-10, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00], dtype=float32)