Some C++ codes find it difficult to deal with the following code pattern. There should be a section documenting the copy(last[-1:0]) solution.
#include <cstdio>
#include <cstdlib>
int main(int argc, char **argv)
{
float *first = (float*)malloc(1024 * sizeof(float));
float *last = first + 1024;
#pragma acc parallel loop copy(first[0:1024]) copy(last[-1:0])
for (int i = 0; i < 1024 ; i++)
{
if ( first != last )
{
first[i] = (float)i;
}
}
printf("[%d] %f : [%d] %f\n", 0, first[0], 1023, first[1023]);
free(first);
return 0;
}