In [None]:
%%writefile constant.cu

#include <stdio.h>
#include<cuda_runtime.h>

// Declare constsnt memory array on device
__constant__ float lookuptable[256];

// Kernel function that uses the constant lookuptable
__global__ void computeWithLookupTable(float *output,int n){

  int idx = threadIdx.x + blockIdx.x * blockDim.x;

  if(idx < n){
    // Use the constant lookup table for calculations

    output[idx] = lookuptable[idx % 256]*2.0f;

  }
}


int main(){

    //Define host array and populate it with values for the lookup table

    float h_lookuptable[256];

    for(int i=0;i<256;i++){

      h_lookuptable[i] = static_cast<float>(i)*0.5f;

    }

    //copy the lookup table from host to constant memory on the device
    cudaMemcpyToSymbol(lookuptable,h_lookuptable,sizeof(float)*256);

    //Define and allocate output array
    const int arraySize = 1024;
    float *d_output, h_output[arraySize];
    cudaMalloc(&d_output,sizeof(float)*arraySize);

    //Launch the Kernel
    int threadsPerBlock = 256;
    int blocksPerGrid = (arraySize + threadsPerBlock - 1)/threadsPerBlock;
    computeWithLookupTable<<<blocksPerGrid,threadsPerBlock>>>(d_output,arraySize);

    //Copy the result back to the host
    cudaMemcpy(h_output,d_output,sizeof(float)*arraySize,cudaMemcpyDeviceToHost);

    // Print part of the output for verification
    for(int i=0;i<258;i++){
      printf("Output[%d] = %f\n",i, h_output[i]);
    }

    //Free device memory
    cudaFree(d_output);

    return 0;
}






Overwriting constant.cu


In [None]:
!nvcc -o exe constant.cu
!./exe

Output[0] = 0.000000
Output[1] = 1.000000
Output[2] = 2.000000
Output[3] = 3.000000
Output[4] = 4.000000
Output[5] = 5.000000
Output[6] = 6.000000
Output[7] = 7.000000
Output[8] = 8.000000
Output[9] = 9.000000
Output[10] = 10.000000
Output[11] = 11.000000
Output[12] = 12.000000
Output[13] = 13.000000
Output[14] = 14.000000
Output[15] = 15.000000
Output[16] = 16.000000
Output[17] = 17.000000
Output[18] = 18.000000
Output[19] = 19.000000
Output[20] = 20.000000
Output[21] = 21.000000
Output[22] = 22.000000
Output[23] = 23.000000
Output[24] = 24.000000
Output[25] = 25.000000
Output[26] = 26.000000
Output[27] = 27.000000
Output[28] = 28.000000
Output[29] = 29.000000
Output[30] = 30.000000
Output[31] = 31.000000
Output[32] = 32.000000
Output[33] = 33.000000
Output[34] = 34.000000
Output[35] = 35.000000
Output[36] = 36.000000
Output[37] = 37.000000
Output[38] = 38.000000
Output[39] = 39.000000
Output[40] = 40.000000
Output[41] = 41.000000
Output[42] = 42.000000
Output[43] = 43.000000
Output[4