-
Notifications
You must be signed in to change notification settings - Fork 0
/
Unique_index_calc_1D_gid.cu
50 lines (38 loc) · 1.39 KB
/
Unique_index_calc_1D_gid.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
//S1L10: Unique index calculation using 2D grid
__global__ void unique_idx_calc_threadIdx(int * input) //we will transfer memory from host to this pointer in the device
{
int tid = threadIdx.x; //unique id for each thread
int offset = blockIdx.x * blockDim.x;
int gid = tid + offset; //gid = global id / unique id to access each element of an array
printf("threadIdx : %d, blockIdx.x : %d, blockDim.x : %d, gid : %d, array_val : %d \n", tid, blockIdx.x, blockDim.x, gid, input[gid]);
//blocks will accesses randomly, ex. num 2 block will access then 3 , then 0 ...
}
int main()
{
int array_size = 8;
int array_byte_size = sizeof(int) * array_size;
int h_data[] = {23,9,4,53,65,12,1,33};
for (int i = 0; i < array_size; i++)
{
printf("%d ", h_data[i]);
}
printf("\n \n");
int* d_data;
cudaMalloc((void**)&d_data, array_byte_size);
cudaMemcpy(d_data, h_data, array_byte_size, cudaMemcpyHostToDevice);
dim3 block(4);//per block 4 threads
dim3 grid(2); //there is 2 blocks
//now in this case, first four vals will access twice by the two block of 4 threads
//challenge : how can we access other four elements??
//use gid
// gid = tid + Offset
// offset = blockidx.x * blockdim.x
unique_idx_calc_threadIdx << <grid, block >> > (d_data);
cudaDeviceSynchronize();
cudaDeviceReset();
return 0;
}