9
9
10
10
#include " Main.h"
11
11
#include " Timer.h"
12
+ #include " WOCL.h"
12
13
13
14
void mpi ( info_t *info ) {
14
15
int rank, numOfProcesses;
@@ -98,7 +99,6 @@ void mpiOpenCL( info_t *info ) {
98
99
#pragma region MPI Init
99
100
int rank, numOfProcesses;
100
101
Timer time ;
101
- cl_event event;
102
102
MPI_Comm_rank ( MPI_COMM_WORLD, &rank );
103
103
MPI_Comm_size ( MPI_COMM_WORLD, &numOfProcesses );
104
104
@@ -121,87 +121,54 @@ void mpiOpenCL( info_t *info ) {
121
121
int myStart = disps[rank] / 4 ;
122
122
#pragma endregion
123
123
124
- #pragma region OpenCL Inicializacija
125
- cl_int ret;
126
-
127
- // Platforma in naprava
128
- cl_platform_id platform_id = nullptr ;
129
- ret = clGetPlatformIDs ( 1 , &platform_id, NULL );
130
- cl_device_id device_id;
131
- ret = clGetDeviceIDs ( platform_id, info->deviceType , 1 , &device_id, NULL );
132
- printf ( " [%d] " , rank); PrintDeviceInfo ( &platform_id, &device_id );
133
-
134
- cl_context context = clCreateContext ( NULL , 1 , &device_id, NULL , NULL , &ret ); // Kontekst
135
- cl_command_queue command_queue = clCreateCommandQueue ( context, device_id, 0 , &ret ); // Ukazna vrsta
136
- #pragma endregion
137
-
138
- #pragma region OpenCL Delitev dela
139
- size_t local_item_size = info->local_item_size ;
140
- size_t num_groups = ((myN - 1 ) / local_item_size + 1 );
141
- size_t global_item_size = num_groups*local_item_size;
142
- printf ( " [%d] Delitev dela: local: %d | num_groups: %d | global: %d (myStart:%d, myN: %d)\n " ,
143
- rank, local_item_size, num_groups, global_item_size, myStart, myN );
144
- #pragma endregion
124
+ WOCL cl = WOCL ( info->deviceType );
125
+ cl.SetWorkSize ( info->local_item_size , WOCL::CalculateNumOfGroups ( info->local_item_size , myN ), 0 );
145
126
146
127
// HOST alokacija ("float4", .w bo masa)
147
128
float *Coord = (float *) malloc ( 4 * sizeof (float ) * info->n );
148
129
float *newCoord = (float *) malloc ( 4 * sizeof (float ) * myN );
149
- float *V = (float *) calloc ( sizeof (float ), 4 * myN );
130
+ float *V = (float *) calloc ( 4 * sizeof (float ), myN );
150
131
if ( rank == 0 )
151
132
generateCoordinatesFloat4 ( Coord, info );
152
133
MPI_Bcast ( Coord, 4 * info->n , MPI_FLOAT, 0 , MPI_COMM_WORLD );
153
134
154
135
if ( rank == 0 )
155
- time .Tic ();
136
+ time .TicSimple ();
137
+
156
138
// Device alokacija
157
- cl_mem devCoord = clCreateBuffer ( context, CL_MEM_READ_WRITE, info->n *sizeof (cl_float4), NULL , &ret );
158
- cl_mem devCoordNew = clCreateBuffer ( context, CL_MEM_READ_WRITE, myN*sizeof (cl_float4), NULL , &ret );
159
- cl_mem devV = clCreateBuffer ( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, myN*sizeof (cl_float4), V, &ret );
139
+ cl_mem devCoord = cl. CreateBuffer ( info->n *sizeof (cl_float4), CL_MEM_READ_WRITE, NULL );
140
+ cl_mem devCoordNew = cl. CreateBuffer ( myN*sizeof (cl_float4), CL_MEM_READ_WRITE, NULL );
141
+ cl_mem devV = cl. CreateBuffer ( myN*sizeof (cl_float4), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, V );
160
142
161
143
// Priprava programa
162
- cl_program program;
163
- BuildKernel ( &program, &context, &device_id, " res/kernelCombo.cl" );
144
+ cl.CreateAndBuildKernel ( " res/kernelCombo.cl" , " kernelCombo" );
164
145
165
- // priprava šcepca
166
- cl_kernel krnl = clCreateKernel ( program, " kernelCombo" , &ret );
167
- ret = clSetKernelArg ( krnl, 0 , sizeof (cl_mem), (void *) &devCoord );
168
- ret |= clSetKernelArg ( krnl, 1 , sizeof (cl_mem), (void *) &devCoordNew );
169
- ret |= clSetKernelArg ( krnl, 2 , sizeof (cl_mem), (void *) &devV );
170
- ret |= clSetKernelArg ( krnl, 3 , sizeof (cl_int), (void *) &myStart );
171
- ret |= clSetKernelArg ( krnl, 4 , sizeof (cl_int), (void *) &(info->n ) );
172
- ret |= clSetKernelArg ( krnl, 5 , sizeof (cl_float), (void *) &(info->eps ) );
173
- ret |= clSetKernelArg ( krnl, 6 , sizeof (cl_float), (void *) &(info->kappa ) );
174
- ret |= clSetKernelArg ( krnl, 7 , sizeof (cl_float), (void *) &(info->dt ) );
146
+ cl.SetKernelArgument <cl_mem>( 0 , &devCoord );
147
+ cl.SetKernelArgument <cl_mem>( 1 , &devCoordNew );
148
+ cl.SetKernelArgument <cl_mem>( 2 , &devV );
149
+ cl.SetKernelArgument <cl_int>( 3 , &myStart );
150
+ cl.SetKernelArgument <cl_int>( 4 , &(info->n ) );
151
+ cl.SetKernelArgument <cl_int>( 5 , &myN );
152
+ cl.SetKernelArgument <cl_float>( 6 , &(info->eps ) );
153
+ cl.SetKernelArgument <cl_float>( 7 , &(info->kappa ) );
154
+ cl.SetKernelArgument <cl_float>( 8 , &(info->dt ) );
175
155
176
156
177
157
// zagon šèepca
178
158
for ( int step = 0 ; step < info->steps ; step++ ) {
179
- ret = clEnqueueWriteBuffer ( command_queue, devCoord, CL_TRUE, 0 , info->n *sizeof (cl_float4), Coord, 0 , NULL , NULL );
180
-
181
- ret |= clEnqueueNDRangeKernel ( command_queue, krnl, 1 , NULL , &global_item_size, &local_item_size, 0 , NULL , NULL );
182
-
183
- // Prenos rezultatov na gostitelja in posiljanje ostalim
184
- ret = clEnqueueReadBuffer ( command_queue, devCoordNew, CL_TRUE, 0 , myN * sizeof (cl_float4), newCoord, 0 , NULL , &event );
185
- clWaitForEvents ( 1 , &event );
159
+ cl.CopyHostToDevice ( &devCoord, Coord, info->n *sizeof (cl_float4) );
160
+ cl.ExecuteKernel ();
161
+ cl.CopyDeviceToHost ( &devCoordNew, newCoord, myN * sizeof (cl_float4) );
162
+ cl.Finish ();
186
163
MPI_Allgatherv ( newCoord, counts[rank], MPI_FLOAT, Coord, counts, disps, MPI_FLOAT, MPI_COMM_WORLD );
187
164
}
188
165
189
166
if ( rank == 0 ) {
190
- printf ( " Cas izvajanja %lf \n " , time .Toc () );
167
+ printf ( " Time: %.3lf \n " , time .TocSimple () );
191
168
checkResultsFloat4 ( Coord, info->n );
192
169
}
193
170
194
171
#pragma region Cleanup
195
- ret = clFlush ( command_queue );
196
- ret = clFinish ( command_queue );
197
- ret = clReleaseKernel ( krnl );
198
- ret = clReleaseProgram ( program );
199
- ret = clReleaseMemObject ( devV );
200
- ret = clReleaseMemObject ( devCoord );
201
- ret = clReleaseMemObject ( devCoordNew );
202
- ret = clReleaseCommandQueue ( command_queue );
203
- ret = clReleaseContext ( context );
204
-
205
172
free ( V );
206
173
free ( counts );
207
174
free ( disps );
0 commit comments