## Device Routines

### `omp_is_initial_device` Routine

The following example shows how the `omp_is_initial_device` runtime library routine  can be used to query if a code is executing on the initial host device or on a  target device. The example then sets the number of threads in the `parallel`  region based on where the code is executing.

In [None]:
//%compiler: clang
//%cflags: -fopenmp

/*
* name: device.1c
* type: C
* version: omp_4.0
*/
#include <stdio.h>
#include <omp.h>

#pragma omp declare target
void vec_mult(float *p, float *v1, float *v2, int N);
extern float *p, *v1, *v2;
extern int N;
#pragma omp end declare target

extern void init_vars(float *, float *, int);
extern void output(float *, int);

void foo()
{
   init_vars(v1, v2, N);
   #pragma omp target device(42) map(p[:N], v1[:N], v2[:N])
   {
      vec_mult(p, v1, v2, N);
   }
   output(p, N);
}

void vec_mult(float *p, float *v1, float *v2, int N)
{
   int i;
   int nthreads;
   if (!omp_is_initial_device())
   {
      printf("1024 threads on target device\n");
      nthreads = 1024;
   }
   else
   {
      printf("8 threads on initial device\n");
      nthreads = 8;
   }
   #pragma omp parallel for private(i) num_threads(nthreads)
   for (i=0; i<N; i++)
     p[i] = v1[i] * v2[i];
}



In [None]:

! name: device.1f
! type: F-free
! version:    omp_4.0
module params
   integer,parameter :: N=1024
end module params
module vmult
contains
   subroutine vec_mult(p, v1, v2, N)
   use omp_lib, ONLY : omp_is_initial_device
   !$omp declare target
   real    :: p(N), v1(N), v2(N)
   integer :: i, nthreads, N
      if (.not. omp_is_initial_device()) then
         print*, "1024 threads on target device"
         nthreads = 1024
      else
         print*, "8 threads on initial device"
         nthreads = 8
      endif
      !$omp parallel do private(i) num_threads(nthreads)
      do i = 1,N
        p(i) = v1(i) * v2(i)
      end do
   end subroutine vec_mult
end module vmult
program prog_vec_mult
use params
use vmult
real :: p(N), v1(N), v2(N)
   call init(v1,v2,N)
   !$omp target device(42) map(p, v1, v2)
      call vec_mult(p, v1, v2, N)
   !$omp end target
   call output(p, N)
end program



### `omp_get_num_devices` Routine

The following example shows how the `omp_get_num_devices` runtime library routine  can be used to determine the number of devices.

In [None]:
//%compiler: clang
//%cflags: -fopenmp

/*
* name: device.2c
* type: C
* version: omp_4.0
*/
#include <omp.h>
extern void init(float *, float *, int);
extern void output(float *, int);
void vec_mult(float *p, float *v1, float *v2, int N)
{
   int i;
   init(v1, v2, N);
   int ndev = omp_get_num_devices();
   int do_offload = (ndev>0 && N>1000000);
   #pragma omp target if(do_offload) map(to: v1[0:N], v2[:N]) map(from: p[0:N])
   #pragma omp parallel for if(N>1000) private(i)
   for (i=0; i<N; i++)
     p[i] = v1[i] * v2[i];
   output(p, N);
}



In [None]:

! name: device.2f
! type: F-free
! version:    omp_4.0
subroutine vec_mult(p, v1, v2, N)
use omp_lib, ONLY : omp_get_num_devices
real    :: p(N), v1(N), v2(N)
integer :: N, i, ndev
logical :: do_offload
   call init(v1, v2, N)
   ndev = omp_get_num_devices()
   do_offload = (ndev>0) .and. (N>1000000)
   !$omp target if(do_offload) map(to: v1, v2) map(from: p)
   !$omp parallel do if(N>1000)
      do i=1,N
         p(i) = v1(i) * v2(i)
      end do
   !$omp end target
   call output(p, N)
end subroutine



### `omp_set_default_device` and  `omp_get_default_device` Routines

The following example shows how the `omp_set_default_device` and `omp_get_default_device`  runtime library routines can be used to set the default device and determine the  default device respectively.

In [None]:
//%compiler: clang
//%cflags: -fopenmp

/*
* name: device.3c
* type: C
* version: omp_4.0
*/
#include <omp.h>
#include <stdio.h>
void foo(void)
{
   int default_device = omp_get_default_device();
   printf("Default device = %d\n", default_device);
   omp_set_default_device(default_device+1);
   if (omp_get_default_device() != default_device+1)
      printf("Default device is still = %d\n", default_device);
}



In [None]:

! name: device.3f
! type: F-free
! version:    omp_4.0
program foo
use omp_lib, ONLY : omp_get_default_device, omp_set_default_device
integer :: old_default_device, new_default_device
   old_default_device = omp_get_default_device()
   print*, "Default device = ", old_default_device
   new_default_device = old_default_device + 1
   call omp_set_default_device(new_default_device)
   if (omp_get_default_device() == old_default_device) &
      print*,"Default device is STILL = ", old_default_device
end program



### Target Memory and Device Pointers Routines

The following example shows how to create space on a device, transfer data to and from that space, and free the space, using API calls. The API calls directly execute allocation, copy and free operations on the device, without invoking any mapping through a `target` directive. The `omp_target_alloc` routine allocates space and returns a device pointer for referencing the space in the `omp_target_memcpy` API routine on the host. The `omp_target_free` routine frees the space on the device.

The example also illustrates how to access that space in a `target` region by exposing the device pointer in an `is_device_ptr` clause.

The example creates an array of cosine values on the default device, to be used on the host device. The function fails if a default device is not available.

In [None]:
//%compiler: clang
//%cflags: -fopenmp

/*
* name: device.6c
* type: C
* version: omp_4.5
*/
#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <omp.h>

void get_dev_cos(double *mem, size_t s)
{
   int h, t, i;
   double * mem_dev_cpy;
   h = omp_get_initial_device();
   t = omp_get_default_device();

   if (omp_get_num_devices() < 1 || t < 0){
      printf(" ERROR: No device found.\n");
      exit(1);
   }

   mem_dev_cpy = (double *)omp_target_alloc( sizeof(double) * s, t);
   if(mem_dev_cpy == NULL){
      printf(" ERROR: No space left on device.\n");
      exit(1);
   }

                          /* dst  src */
   omp_target_memcpy(mem_dev_cpy, mem, sizeof(double)*s,
                              0,    0,
                              t,   h);

   #pragma omp target is_device_ptr(mem_dev_cpy) device(t)
   #pragma omp teams distribute parallel for
     for(i=0;i<s;i++){ mem_dev_cpy[i] = cos((double)i); } /* init data */

                   /* dst  src */
    omp_target_memcpy(mem, mem_dev_cpy, sizeof(double)*s,
                      0,             0,
                      h,             t);

    omp_target_free(mem_dev_cpy, t);
}

