-
Notifications
You must be signed in to change notification settings - Fork 0
/
CUDA.cu
executable file
·69 lines (52 loc) · 1.58 KB
/
CUDA.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#include <stdio.h>
__global__
void update(float *ad, float *bd, int ny,int nx)
{
int x, y;
x = blockIdx.x;
y = threadIdx.x;
if(x > 0 && y > 0 && x < nx-1 && y < ny-1)
bd[x*ny+y] = ad[x*ny+y] + (ad[(x+1)*ny+y] + ad[(x-1)*ny+y] - 2 * ad[x*ny+y])/10 + (ad[x*ny+(y+1)] + ad[x*ny+(y-1)] - 2 * ad[x*ny+y])/10;
}
extern "C" float updateGPU(float **arr1, float **arr2, int nx, int ny, int steps)
{
float *ad,*bd,s[nx*ny], milli = 0.0;
int i, j;
size_t size = nx*ny*sizeof(float);
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
printf("= mpi_heat2D - CUDA Version =\nGrid size: X = %d, Y = %d, Time steps = %d\n",nx,ny,steps);
for (i = 0;i<nx;i++)
for (j = 0;j<ny;j++){
s[i*ny+j] = arr1[i][j];
}
cudaMalloc( (void**)&ad, size);
cudaMemcpy( ad, s, size, cudaMemcpyHostToDevice );
cudaMalloc( (void**)&bd, size);
dim3 threads_per_block(ny);
dim3 num_blocks(nx,1);
cudaEventRecord(start);
for(i = 0; i < steps; i++){
if(i%2 == 0)
update<<<num_blocks, threads_per_block>>>(ad, bd, ny,nx);
else
update<<<num_blocks, threads_per_block>>>(bd, ad, ny,nx);
}
cudaEventRecord(stop);
if (i%2 == 0)
cudaMemcpy( s, ad, size, cudaMemcpyDeviceToHost );
else
cudaMemcpy( s, bd, size, cudaMemcpyDeviceToHost );
cudaEventSynchronize(stop);
cudaEventElapsedTime(&milli, start, stop);
printf("Time Elapsed is %2.6f seconds\n",milli/1000);
cudaFree( ad );
cudaFree( bd );
cudaEventDestroy(start);
cudaEventDestroy(stop);
for (i = 0;i<nx;i++)
for (j = 0;j<ny;j++)
arr1[i][j] = s[i*ny+j];
return 1;
}