Skip to content

Commit d5f0336

Browse files
committed
updated combo to WOCL
1 parent 4affe0b commit d5f0336

File tree

5 files changed

+35
-62
lines changed

5 files changed

+35
-62
lines changed

NBody OpenCL/Main.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ void printHelp() {
2929

3030
int main( int argc, char **argv ) {
3131
info_t info;
32-
info.n = 1000;
32+
info.n = 10000;
3333
info.steps = 100;
3434
info.sphereRadius = 10; //10
3535
info.kappa = 1;
@@ -41,13 +41,13 @@ int main( int argc, char **argv ) {
4141
info.local_item_size = 256;
4242
info.randFunc = SPHERE_2_POLES;
4343

44-
bool doMPI = true;
44+
bool doMPI = false;
4545
bool doCPU = false;
4646
bool doCPUOpt = false;
4747
bool doGPU1 = false;
48-
bool doGPU2 = false;
48+
bool doGPU2 = true;
4949
bool doGPU3 = false;
50-
bool doCombo = false;
50+
bool doCombo = true;
5151
bool doGL = false;
5252

5353
#pragma region Parse Arguments

NBody OpenCL/WOCL.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,10 @@ void WOCL::CopyDeviceToHost( cl_mem *device, void *host, size_t size ) {
151151
ret = clEnqueueReadBuffer( m_queue, *device, CL_TRUE, 0, size, host, 0, NULL, NULL );
152152
CheckForError( ret, "clEnqueueReadBuffer" );
153153
}
154+
void WOCL::CopyHostToDevice( cl_mem *device, void *host, size_t size ) {
155+
ret = clEnqueueWriteBuffer( m_queue, *device, CL_TRUE, 0, size, host, 0, NULL, NULL );
156+
CheckForError( ret, "clEnqueueReadBuffer" );
157+
}
154158
void WOCL::SetAndAllocKernelArgument( int idx, size_t size ) {
155159
ret = clSetKernelArg( m_kernel, idx, size, NULL);
156160
CheckForError( ret, "clSetKernelArg (alloc)" );

NBody OpenCL/WOCL.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ class WOCL {
100100
cl_mem CreateBuffer( size_t size, cl_mem_flags flags, void *hostBuffer );
101101
cl_mem CreateBufferFromGLBuffer( cl_mem_flags flags, GLuint buffer);
102102
void CopyDeviceToHost( cl_mem *device, void *host, size_t size );
103+
void CopyHostToDevice( cl_mem *device, void *host, size_t size );
103104
void ExecuteKernel();
104105
void AcquireObjectsFromGLAndFinish( cl_uint num, cl_mem *objects );
105106
void ReleaseObjectsToGLAndFinish( cl_uint num, cl_mem *objects );

NBody OpenCL/mpi.cpp

Lines changed: 24 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
#include "Main.h"
1111
#include "Timer.h"
12+
#include "WOCL.h"
1213

1314
void mpi( info_t *info ) {
1415
int rank, numOfProcesses;
@@ -98,7 +99,6 @@ void mpiOpenCL( info_t *info ) {
9899
#pragma region MPI Init
99100
int rank, numOfProcesses;
100101
Timer time;
101-
cl_event event;
102102
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
103103
MPI_Comm_size( MPI_COMM_WORLD, &numOfProcesses );
104104

@@ -121,87 +121,54 @@ void mpiOpenCL( info_t *info ) {
121121
int myStart = disps[rank] / 4;
122122
#pragma endregion
123123

124-
#pragma region OpenCL Inicializacija
125-
cl_int ret;
126-
127-
// Platforma in naprava
128-
cl_platform_id platform_id = nullptr;
129-
ret = clGetPlatformIDs( 1, &platform_id, NULL );
130-
cl_device_id device_id;
131-
ret = clGetDeviceIDs( platform_id, info->deviceType, 1, &device_id, NULL );
132-
printf( "[%d] ", rank); PrintDeviceInfo( &platform_id, &device_id );
133-
134-
cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret ); // Kontekst
135-
cl_command_queue command_queue = clCreateCommandQueue( context, device_id, 0, &ret ); // Ukazna vrsta
136-
#pragma endregion
137-
138-
#pragma region OpenCL Delitev dela
139-
size_t local_item_size = info->local_item_size;
140-
size_t num_groups = ((myN - 1) / local_item_size + 1);
141-
size_t global_item_size = num_groups*local_item_size;
142-
printf( "[%d] Delitev dela: local: %d | num_groups: %d | global: %d (myStart:%d, myN: %d)\n",
143-
rank, local_item_size, num_groups, global_item_size, myStart, myN );
144-
#pragma endregion
124+
WOCL cl = WOCL( info->deviceType );
125+
cl.SetWorkSize( info->local_item_size, WOCL::CalculateNumOfGroups( info->local_item_size, myN ), 0 );
145126

146127
// HOST alokacija ("float4", .w bo masa)
147128
float *Coord = (float *) malloc( 4 * sizeof(float) * info->n );
148129
float *newCoord = (float *) malloc( 4 * sizeof(float) * myN );
149-
float *V = (float *) calloc( sizeof(float), 4 * myN );
130+
float *V = (float *) calloc( 4 *sizeof(float), myN );
150131
if( rank == 0 )
151132
generateCoordinatesFloat4( Coord, info );
152133
MPI_Bcast( Coord, 4 * info->n, MPI_FLOAT, 0, MPI_COMM_WORLD );
153134

154135
if( rank == 0 )
155-
time.Tic();
136+
time.TicSimple();
137+
156138
// Device alokacija
157-
cl_mem devCoord = clCreateBuffer( context, CL_MEM_READ_WRITE, info->n*sizeof(cl_float4), NULL, &ret );
158-
cl_mem devCoordNew = clCreateBuffer( context, CL_MEM_READ_WRITE, myN*sizeof(cl_float4), NULL, &ret );
159-
cl_mem devV = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, myN*sizeof(cl_float4), V, &ret );
139+
cl_mem devCoord = cl.CreateBuffer( info->n*sizeof(cl_float4), CL_MEM_READ_WRITE, NULL );
140+
cl_mem devCoordNew = cl.CreateBuffer( myN*sizeof(cl_float4), CL_MEM_READ_WRITE, NULL );
141+
cl_mem devV = cl.CreateBuffer( myN*sizeof(cl_float4), CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, V );
160142

161143
// Priprava programa
162-
cl_program program;
163-
BuildKernel( &program, &context, &device_id, "res/kernelCombo.cl" );
144+
cl.CreateAndBuildKernel( "res/kernelCombo.cl", "kernelCombo" );
164145

165-
// priprava šcepca
166-
cl_kernel krnl = clCreateKernel( program, "kernelCombo", &ret );
167-
ret = clSetKernelArg( krnl, 0, sizeof(cl_mem), (void *) &devCoord );
168-
ret |= clSetKernelArg( krnl, 1, sizeof(cl_mem), (void *) &devCoordNew );
169-
ret |= clSetKernelArg( krnl, 2, sizeof(cl_mem), (void *) &devV );
170-
ret |= clSetKernelArg( krnl, 3, sizeof(cl_int), (void *) &myStart );
171-
ret |= clSetKernelArg( krnl, 4, sizeof(cl_int), (void *) &(info->n) );
172-
ret |= clSetKernelArg( krnl, 5, sizeof(cl_float), (void *) &(info->eps) );
173-
ret |= clSetKernelArg( krnl, 6, sizeof(cl_float), (void *) &(info->kappa) );
174-
ret |= clSetKernelArg( krnl, 7, sizeof(cl_float), (void *) &(info->dt) );
146+
cl.SetKernelArgument<cl_mem>( 0, &devCoord );
147+
cl.SetKernelArgument<cl_mem>( 1, &devCoordNew );
148+
cl.SetKernelArgument<cl_mem>( 2, &devV );
149+
cl.SetKernelArgument<cl_int>( 3, &myStart );
150+
cl.SetKernelArgument<cl_int>( 4, &(info->n) );
151+
cl.SetKernelArgument<cl_int>( 5, &myN );
152+
cl.SetKernelArgument<cl_float>( 6, &(info->eps) );
153+
cl.SetKernelArgument<cl_float>( 7, &(info->kappa) );
154+
cl.SetKernelArgument<cl_float>( 8, &(info->dt) );
175155

176156

177157
// zagon šèepca
178158
for( int step = 0; step < info->steps; step++ ) {
179-
ret = clEnqueueWriteBuffer( command_queue, devCoord, CL_TRUE, 0, info->n*sizeof(cl_float4), Coord, 0, NULL, NULL );
180-
181-
ret |= clEnqueueNDRangeKernel( command_queue, krnl, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL );
182-
183-
// Prenos rezultatov na gostitelja in posiljanje ostalim
184-
ret = clEnqueueReadBuffer( command_queue, devCoordNew, CL_TRUE, 0, myN * sizeof(cl_float4), newCoord, 0, NULL, &event );
185-
clWaitForEvents( 1, &event );
159+
cl.CopyHostToDevice( &devCoord, Coord, info->n*sizeof(cl_float4) );
160+
cl.ExecuteKernel();
161+
cl.CopyDeviceToHost( &devCoordNew, newCoord, myN * sizeof(cl_float4) );
162+
cl.Finish();
186163
MPI_Allgatherv( newCoord, counts[rank], MPI_FLOAT, Coord, counts, disps, MPI_FLOAT, MPI_COMM_WORLD );
187164
}
188165

189166
if( rank == 0 ) {
190-
printf( "Cas izvajanja %lf\n", time.Toc() );
167+
printf( "Time: %.3lf\n", time.TocSimple() );
191168
checkResultsFloat4( Coord, info->n );
192169
}
193170

194171
#pragma region Cleanup
195-
ret = clFlush( command_queue );
196-
ret = clFinish( command_queue );
197-
ret = clReleaseKernel( krnl );
198-
ret = clReleaseProgram( program );
199-
ret = clReleaseMemObject( devV );
200-
ret = clReleaseMemObject( devCoord );
201-
ret = clReleaseMemObject( devCoordNew );
202-
ret = clReleaseCommandQueue( command_queue );
203-
ret = clReleaseContext( context );
204-
205172
free( V );
206173
free( counts );
207174
free( disps );

NBody OpenCL/res/kernelCombo.cl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@ kernel void kernelCombo( global float4 *Coord,
33
global float4 *V,
44
int m,
55
int n,
6+
int myN,
67
float eps,
78
float kappa,
89
float dt
910
) {
1011
int id = get_global_id( 0 );
1112
int idGlobal = id + m;
1213

13-
if( id < n ) {
14+
if( id < myN ) {
1415
float4 myBody = Coord[idGlobal];
1516
float4 a = (float4) (0, 0, 0, 0);
1617

0 commit comments

Comments
 (0)