| @@ -0,0 +1,155 @@ | ||
| #include "include/cl_wrapper.hpp" | ||
| #include "include/oclUtils.h" | ||
|
|
||
| void CLSetup::init(char* deviceName) | ||
| { | ||
| getPlatformID(); | ||
| getDeviceID(deviceName); | ||
| getContextnQueue(); | ||
| } | ||
|
|
||
| void CLSetup::getPlatformID() | ||
| { | ||
|
|
||
| cl_uint num_of_platforms = 0; | ||
| // get total number of available platforms: | ||
| cl_int err = CL_SUCCESS; | ||
| err = clGetPlatformIDs(0, 0, &num_of_platforms); | ||
|
|
||
| cl_platform_id* platforms = new cl_platform_id[num_of_platforms]; | ||
| // get IDs for all platforms: | ||
| err = clGetPlatformIDs(num_of_platforms, platforms, 0); | ||
|
|
||
| cl_uint selected_platform_index = num_of_platforms; | ||
|
|
||
| for (cl_uint i = 0; i < num_of_platforms; ++i) | ||
| { | ||
| // Get the length for the i-th platform name | ||
| size_t platform_name_length = 0; | ||
| err = clGetPlatformInfo( | ||
| platforms[i], | ||
| CL_PLATFORM_NAME, | ||
| 0, | ||
| 0, | ||
| &platform_name_length | ||
| ); | ||
|
|
||
| // Get the name itself for the i-th platform | ||
| char* platform_name = new char[platform_name_length]; | ||
| err = clGetPlatformInfo( | ||
| platforms[i], | ||
| CL_PLATFORM_NAME, | ||
| platform_name_length, | ||
| platform_name, | ||
| 0 | ||
| ); | ||
|
|
||
| // decide if this i-th platform is what we are looking for | ||
| // we select the first one matched skipping the next one if any | ||
| if (strstr(platform_name, /*"AMD"*/ "NVIDIA" /*"Intel(R) OpenCL"*/) && | ||
| selected_platform_index == num_of_platforms) | ||
| { | ||
| selected_platform_index = i; | ||
| _platformID = platforms[i]; | ||
| // do not stop here, just see all available platforms | ||
| } | ||
|
|
||
| delete[] platform_name; | ||
| } | ||
|
|
||
|
|
||
| /// !TODO: Multiple Platforms | ||
| // cl_platform_id* _platformID; | ||
| // _status = clGetPlatformIDs(NUMBER_OF_PLATFORMS, NULL, &_numPlatforms); | ||
| // DEBUG_CL(_status); | ||
| // _platformID = (cl_platform_id *)malloc(sizeof(cl_platform_id) * _numPlatforms); | ||
| // _status =clGetPlatformIDs(_numPlatforms, _platformID, NULL); | ||
| // DEBUG_CL(_status); | ||
| // _platformIDsVector.assign(_platformID[0], _platformID[_numPlatforms]); | ||
| } | ||
|
|
||
| void CLSetup::getDeviceID(char *devName) | ||
| { | ||
| /// !TODO: For Multiple Devices | ||
| _status = clGetDeviceIDs(_platformID,CL_DEVICE_TYPE_GPU, 1, NULL, &_numDevices); | ||
| DEBUG_CL(_status); | ||
| std::cout<<"CL_COMPUTE DEVICES: "<<_numDevices<<std::endl; | ||
| _status = clGetDeviceIDs(_platformID, CL_DEVICE_TYPE_GPU, 1, &_deviceID, NULL); | ||
| DEBUG_CL(_status); | ||
| std::cout<<"CL_DEVICE_ID: "<<_deviceID<<std::endl; | ||
|
|
||
| char device_string[1024]; | ||
| clGetDeviceInfo(_deviceID, CL_DEVICE_NAME, sizeof(device_string), &device_string, NULL); | ||
| strcpy(devName, device_string); | ||
|
|
||
| // Getting some information about the device | ||
| // Getting some information about the device | ||
|
|
||
| oclPrintDevInfo(LOGCONSOLE, _deviceID); | ||
| clGetDeviceInfo(_deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &_maxComputeUnits, NULL); | ||
| std::cout<<"CL_DEVICE_MAX_COMPUTE_UNITS: "<<_maxComputeUnits<<std::endl; | ||
| clGetDeviceInfo(_deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &_maxWorkGroupSize, NULL); | ||
| clGetDeviceInfo(_deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &_maxMemAllocSize, NULL); | ||
| clGetDeviceInfo(_deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), &_globalMemSize, NULL); | ||
| clGetDeviceInfo(_deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(cl_ulong), &_constMemSize, NULL); | ||
| clGetDeviceInfo(_deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), &_localMemSize, NULL); | ||
| ///!TODO:Add get KernelInfo APIs | ||
|
|
||
| } | ||
|
|
||
| void CLSetup::getContextnQueue() | ||
| { | ||
| cl_command_queue_properties queueProps = NULL;// CL_QUEUE_PROFILING_ENABLE; | ||
| _context = clCreateContext(NULL, 1, &_deviceID, NULL, NULL, &_status); | ||
| DEBUG_CL(_status); | ||
| _queue = clCreateCommandQueue(_context, _deviceID, queueProps, &_status); | ||
| DEBUG_CL(_status); | ||
| } | ||
|
|
||
| /// | ||
| /// \brief CLSetup::createProgram | ||
| /// \param kernelFilePath | ||
| /// \return | ||
| /// | ||
| Program *CLSetup::createProgram(std::vector<std::string> kernelFilePath) | ||
| //Program *CLSetup::createProgram(std::string& kernelFilePath) | ||
| { | ||
| ///!TODO: Add support for char** along with string | ||
| Program* tmp = new Program(kernelFilePath, &_context, &_queue, | ||
| &_deviceID); | ||
| return tmp; | ||
| } | ||
|
|
||
| OCLBuffer* CLSetup::createBuffer(const size_t size, const cl_mem_flags flags, | ||
| void *hostMem) | ||
| { | ||
| cl_mem buff = clCreateBuffer(_context,flags, size, hostMem ,&_status); | ||
| if(_status == CL_SUCCESS) | ||
| { | ||
| OCLBuffer* ret = new OCLBuffer(buff, &_queue); | ||
| return ret; | ||
| } | ||
| DEBUG_CL(_status); | ||
| if(_status != CL_SUCCESS) | ||
| printf("createBuffer error : %s", getCLErrorString(_status)); | ||
| return NULL; //TODO: Return custom status value | ||
| } | ||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
| @@ -0,0 +1,235 @@ | ||
| #include "include/clutils.h" | ||
| #include <stdlib.h> | ||
| #include <stdio.h> | ||
| #include <CL/opencl.h> | ||
|
|
||
| char* utils_cl_enum_to_string (cl_int value) | ||
| { | ||
| switch (value) | ||
| { | ||
| /* cl_channel_order */ | ||
| case 0x10B0: return "CL_R"; | ||
| case 0x10B1: return "CL_A"; | ||
| case 0x10B2: return "CL_RG"; | ||
| case 0x10B3: return "CL_RA"; | ||
| case 0x10B4: return "CL_RGB"; | ||
| case 0x10B5: return "CL_RGBA"; | ||
| case 0x10B6: return "CL_BGRA"; | ||
| case 0x10B7: return "CL_ARGB"; | ||
| case 0x10B8: return "CL_INTENSITY"; | ||
| case 0x10B9: return "CL_LUMINANCE"; | ||
| case 0x10BA: return "CL_Rx"; | ||
| case 0x10BB: return "CL_RGx"; | ||
| case 0x10BC: return "CL_RGBx"; | ||
|
|
||
| /* cl_channel_type */ | ||
| case 0x10D0: return "CL_SNORM_INT8"; | ||
| case 0x10D1: return "CL_SNORM_INT16"; | ||
| case 0x10D2: return "CL_UNORM_INT8"; | ||
| case 0x10D3: return "CL_UNORM_INT16"; | ||
| case 0x10D4: return "CL_UNORM_SHORT_565"; | ||
| case 0x10D5: return "CL_UNORM_SHORT_555"; | ||
| case 0x10D6: return "CL_UNORM_INT_101010"; | ||
| case 0x10D7: return "CL_SIGNED_INT8"; | ||
| case 0x10D8: return "CL_SIGNED_INT16"; | ||
| case 0x10D9: return "CL_SIGNED_INT32"; | ||
| case 0x10DA: return "CL_UNSIGNED_INT8"; | ||
| case 0x10DB: return "CL_UNSIGNED_INT16"; | ||
| case 0x10DC: return "CL_UNSIGNED_INT32"; | ||
| case 0x10DD: return "CL_HALF_FLOAT"; | ||
| case 0x10DE: return "CL_FLOAT"; | ||
| default: return "Unknown value!"; | ||
| } | ||
| } | ||
|
|
||
| char* utils_get_ocl_error (cl_int err_code) | ||
| { | ||
| switch (err_code) | ||
| { | ||
| case CL_SUCCESS: return "Success!"; | ||
| case CL_DEVICE_NOT_FOUND: return "Device not found."; | ||
| case CL_DEVICE_NOT_AVAILABLE: return "Device not available"; | ||
| case CL_COMPILER_NOT_AVAILABLE: return "Compiler not available"; | ||
| case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "Memory object allocation failure"; | ||
| case CL_OUT_OF_RESOURCES: return "Out of resources"; | ||
| case CL_OUT_OF_HOST_MEMORY: return "Out of host memory"; | ||
| case CL_PROFILING_INFO_NOT_AVAILABLE: return "Profiling information not available"; | ||
| case CL_MEM_COPY_OVERLAP: return "Memory copy overlap"; | ||
| case CL_IMAGE_FORMAT_MISMATCH: return "Image format mismatch"; | ||
| case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "Image format not supported"; | ||
| case CL_BUILD_PROGRAM_FAILURE: return "Program build failure"; | ||
| case CL_MAP_FAILURE: return "Map failure"; | ||
| case CL_INVALID_VALUE: return "Invalid value"; | ||
| case CL_INVALID_DEVICE_TYPE: return "Invalid device type"; | ||
| case CL_INVALID_PLATFORM: return "Invalid platform"; | ||
| case CL_INVALID_DEVICE: return "Invalid device"; | ||
| case CL_INVALID_CONTEXT: return "Invalid context"; | ||
| case CL_INVALID_QUEUE_PROPERTIES: return "Invalid queue properties"; | ||
| case CL_INVALID_COMMAND_QUEUE: return "Invalid command queue"; | ||
| case CL_INVALID_HOST_PTR: return "Invalid host pointer"; | ||
| case CL_INVALID_MEM_OBJECT: return "Invalid memory object"; | ||
| case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "Invalid image format descriptor"; | ||
| case CL_INVALID_IMAGE_SIZE: return "Invalid image size"; | ||
| case CL_INVALID_SAMPLER: return "Invalid sampler"; | ||
| case CL_INVALID_BINARY: return "Invalid binary"; | ||
| case CL_INVALID_BUILD_OPTIONS: return "Invalid build options"; | ||
| case CL_INVALID_PROGRAM: return "Invalid program"; | ||
| case CL_INVALID_PROGRAM_EXECUTABLE: return "Invalid program executable"; | ||
| case CL_INVALID_KERNEL_NAME: return "Invalid kernel name"; | ||
| case CL_INVALID_KERNEL_DEFINITION: return "Invalid kernel definition"; | ||
| case CL_INVALID_KERNEL: return "Invalid kernel"; | ||
| case CL_INVALID_ARG_INDEX: return "Invalid argument index"; | ||
| case CL_INVALID_ARG_VALUE: return "Invalid argument value"; | ||
| case CL_INVALID_ARG_SIZE: return "Invalid argument size"; | ||
| case CL_INVALID_KERNEL_ARGS: return "Invalid kernel arguments"; | ||
| case CL_INVALID_WORK_DIMENSION: return "Invalid work dimension"; | ||
| case CL_INVALID_WORK_GROUP_SIZE: return "Invalid work group size"; | ||
| case CL_INVALID_WORK_ITEM_SIZE: return "Invalid work item size"; | ||
| case CL_INVALID_GLOBAL_OFFSET: return "Invalid global offset"; | ||
| case CL_INVALID_EVENT_WAIT_LIST: return "Invalid event wait list"; | ||
| case CL_INVALID_EVENT: return "Invalid event"; | ||
| case CL_INVALID_OPERATION: return "Invalid operation"; | ||
| case CL_INVALID_GL_OBJECT: return "Invalid OpenGL object"; | ||
| case CL_INVALID_BUFFER_SIZE: return "Invalid buffer size"; | ||
| case CL_INVALID_MIP_LEVEL: return "Invalid mip-map level"; | ||
| default: return "Unknown error"; | ||
| } | ||
| } | ||
|
|
||
| char* utils_read_file (const char* filename) | ||
| { | ||
| // locals | ||
| FILE* f = NULL; | ||
| size_t file_size; | ||
|
|
||
| f = fopen (filename, "r"); | ||
| if (f == NULL) | ||
| { | ||
| return NULL; | ||
| } | ||
|
|
||
| // get the length | ||
| fseek (f, 0, SEEK_END); | ||
| file_size = ftell (f); | ||
| fseek (f, 0, SEEK_SET); | ||
|
|
||
| // allocate a buffer and fill it | ||
| char* contents = (char *)malloc (file_size + 1); | ||
| if (fread (contents, file_size, sizeof (char), f) != 1) | ||
| { | ||
| fclose (f); | ||
| free (contents); | ||
| return NULL; | ||
| } | ||
|
|
||
| fclose (f); | ||
| contents[file_size] = '\0'; | ||
|
|
||
| return contents; | ||
| } | ||
|
|
||
| int utils_get_platform_and_device (cl_device_type dev_type, | ||
| cl_platform_id *platform, | ||
| cl_device_id *device_id, | ||
| int just_print) | ||
| { | ||
| cl_int err_code; | ||
| cl_uint num_plat; | ||
| cl_int p = clGetPlatformIDs (0, NULL, &num_plat); | ||
|
|
||
| if (just_print) | ||
| { | ||
| if (num_plat == 1) printf ("There is 1 platform available\n"); | ||
| else printf ("There are %u platforms available\n", num_plat); | ||
| } | ||
| if (num_plat <= 0) return 0; | ||
|
|
||
| cl_platform_id *ids = (cl_platform_id *)malloc (sizeof (cl_platform_id) * num_plat); | ||
| clGetPlatformIDs (num_plat, ids, NULL); | ||
|
|
||
| for (unsigned int i=0; i<num_plat; i++) | ||
| { | ||
| if (just_print) utils_print_platform_info (ids[i]); | ||
|
|
||
| cl_uint num_devs; | ||
| err_code = clGetDeviceIDs (ids[i], dev_type, 0, NULL, &num_devs); | ||
| if (err_code != CL_SUCCESS || num_devs <= 0) continue; | ||
|
|
||
| cl_device_id *dev_ids = (cl_device_id *)malloc (sizeof (cl_device_id) * num_devs); | ||
| clGetDeviceIDs (ids[i], dev_type, num_devs, dev_ids, NULL); | ||
| for (unsigned int j=0; j<num_devs; j++) | ||
| { | ||
| if (just_print) utils_print_device_info (dev_ids[j]); | ||
| } | ||
|
|
||
| if (!just_print) | ||
| { | ||
| *platform = ids[i]; | ||
| *device_id = dev_ids[0]; | ||
| return 1; | ||
| } | ||
| } | ||
|
|
||
| return 0; | ||
| } | ||
|
|
||
| void utils_print_platform_info (cl_platform_id platform) | ||
| { | ||
| char buffer[8*1024]; | ||
|
|
||
| clGetPlatformInfo (platform, CL_PLATFORM_NAME, sizeof (buffer), buffer, NULL); | ||
| printf ("PLATFORM_NAME: %s\n", buffer); | ||
| clGetPlatformInfo (platform, CL_PLATFORM_VERSION, sizeof (buffer), buffer, NULL); | ||
| printf (" VERSION: %s\n", buffer); | ||
| clGetPlatformInfo (platform, CL_PLATFORM_VENDOR, sizeof (buffer), buffer, NULL); | ||
| printf (" VENDOR: %s\n", buffer); | ||
| clGetPlatformInfo (platform, CL_PLATFORM_PROFILE, sizeof (buffer), buffer, NULL); | ||
| printf (" PROFILE: %s\n", buffer); | ||
| } | ||
|
|
||
| void utils_print_device_info (cl_device_id dev_id) | ||
| { | ||
| cl_ulong ul_prop; | ||
| cl_uint ui_prop; | ||
| size_t size_prop; | ||
| char buffer[8*1024]; | ||
|
|
||
| clGetDeviceInfo (dev_id, CL_DEVICE_NAME, sizeof (buffer), buffer, NULL); | ||
| printf (" DEVICE: %s\n", buffer); | ||
| clGetDeviceInfo (dev_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof (ui_prop), &ui_prop, NULL); | ||
| printf (" MAX COMPUTE UNITS: %u\n", ui_prop); | ||
| clGetDeviceInfo (dev_id, CL_DEVICE_VERSION, sizeof (buffer), buffer, NULL); | ||
| printf (" DEVICE VERSION: %s\n", buffer); | ||
| clGetDeviceInfo (dev_id, CL_DRIVER_VERSION, sizeof (buffer), buffer, NULL); | ||
| printf (" DRIVER VERSION: %s\n", buffer); | ||
|
|
||
| clGetDeviceInfo (dev_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof (ul_prop), &ul_prop, NULL); | ||
| printf (" GLOBAL MEM SIZE: %lu\n", (unsigned long)ul_prop); | ||
| clGetDeviceInfo (dev_id, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, sizeof (ul_prop), &ul_prop, NULL); | ||
| printf (" GLOBAL MEM CACHE SIZE: %lu\n", (unsigned long)ul_prop); | ||
| clGetDeviceInfo (dev_id, CL_DEVICE_LOCAL_MEM_SIZE, sizeof (ul_prop), &ul_prop, NULL); | ||
| printf (" LOCAL MEM SIZE: %lu\n", (unsigned long)ul_prop); | ||
| clGetDeviceInfo (dev_id, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof (ui_prop), &ui_prop, NULL); | ||
| printf (" MAX CLOCK FREQUENCY: %u\n", ui_prop); | ||
| clGetDeviceInfo (dev_id, CL_DEVICE_PROFILING_TIMER_RESOLUTION, sizeof (size_prop), &size_prop, NULL); | ||
| printf (" TIMER RESOLUTION: %lu\n", (unsigned long)size_prop); | ||
| } | ||
|
|
||
| cl_ulong utils_get_event_time (cl_event event, cl_profiling_info param) | ||
| { | ||
| cl_ulong result; | ||
| cl_int err_code; | ||
|
|
||
| err_code = clGetEventProfilingInfo (event, param, | ||
| sizeof (result), &result, NULL); | ||
| CHECK_OCL_ERROR (clGetEventProfilingInfo, err_code); | ||
|
|
||
| return result; | ||
| } | ||
|
|
||
| cl_ulong utils_get_event_execution_time (cl_event event) | ||
| { | ||
| return utils_get_event_time (event, CL_PROFILING_COMMAND_END) - | ||
| utils_get_event_time (event, CL_PROFILING_COMMAND_START); | ||
| } | ||
|
|
| @@ -0,0 +1,16 @@ | ||
| #include "include/helpers.h" | ||
|
|
||
| std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) { | ||
| std::stringstream ss(s); | ||
| std::string item; | ||
| while (std::getline(ss, item, delim)) { | ||
| elems.push_back(item); | ||
| } | ||
| return elems; | ||
| } | ||
|
|
||
| std::vector<std::string> split(const std::string &s, char delim) { | ||
| std::vector<std::string> elems; | ||
| split(s, delim, elems); | ||
| return elems; | ||
| } |
| @@ -0,0 +1,98 @@ | ||
| #include "include/image2D.h" | ||
|
|
||
| /** | ||
| * @brief | ||
| * | ||
| * @param mem | ||
| * @param rowStep Width/Column * Num of channels | ||
| * @param queue | ||
| */ | ||
| Image2D::Image2D(cl_mem mem, cl_command_queue *queue, int rowStep) | ||
| :OCLBuffer(mem, queue) | ||
| { | ||
| //Needed to copy the right amount of data back to the memory | ||
| DEBUG_VALUE("Image2D::Image2D Constructor: ", mem); | ||
| this->_rowPitch = rowStep; | ||
| } | ||
|
|
||
| void Image2D::read(void *hostMem, const size_t origin[], const size_t region[], cl_bool blocking) | ||
| { | ||
| cl_int err = 0; | ||
| DEBUG_STRING("Image2D::read"); | ||
| DEBUG_VALUE("memory :",_memory); | ||
|
|
||
| DEBUG_STRING("Image Region:"); | ||
| DEBUG_VALUE("Width :" , region[0]); | ||
| DEBUG_VALUE("Height :", region[1]); | ||
| DEBUG_VALUE("Depth :" , region[2]); | ||
| DEBUG_VALUE("Row pitch is: ", _rowPitch); | ||
|
|
||
| DEBUG_STRING("Image Origin:"); | ||
| DEBUG_VALUE("Width :" , origin[0]); | ||
| DEBUG_VALUE("Height :", origin[1]); | ||
| DEBUG_VALUE("Depth :" , origin[2]); | ||
|
|
||
| if(blocking) | ||
| DEBUG_STRING("BLOCKING READ"); | ||
|
|
||
| err = clEnqueueReadImage(*_pQueue, _memory, blocking, | ||
| origin, region, _rowPitch, 0, | ||
| hostMem, 0, NULL, NULL); | ||
|
|
||
| DEBUG_CL(err); | ||
| } | ||
|
|
||
|
|
||
| /** | ||
| * @brief | ||
| * | ||
| * @param hostMem | ||
| * @param size[] region | ||
| * @param offset[] origin | ||
| * @param blocking | ||
| */ | ||
| void Image2D:: write(void *hostMem, const size_t origin[], const size_t region[], cl_bool blocking) | ||
| { | ||
| cl_int err = 0; | ||
| DEBUG_STRING("Image2D:: write"); | ||
| DEBUG_VALUE("Image2D _memory write:",_memory); | ||
| err = clEnqueueWriteImage(*_pQueue, _memory, blocking, origin, | ||
| region, _rowPitch, 0, | ||
| hostMem, 0, NULL, NULL); | ||
| DEBUG_CL(err); | ||
| } | ||
|
|
||
|
|
||
| void *Image2D::map(cl_map_flags flags, const size_t size[], const size_t offset[], size_t &_rowPitch, cl_bool blocking) | ||
| { | ||
| size_t slicePitch; | ||
| cl_int err = 0; | ||
| void* ret = clEnqueueMapImage(*_pQueue, _memory, blocking, flags, offset, size, &_rowPitch, &slicePitch, 0, NULL, NULL, &err); | ||
| DEBUG_CL(err); | ||
| return ret; | ||
| } | ||
|
|
||
|
|
||
| void Image2D::copyToBuffer(OCLBuffer &dst, const size_t size[], const size_t srcOffset[], const size_t dstOffset) | ||
| { | ||
| cl_int err = 0; | ||
| err = clEnqueueCopyImageToBuffer(*_pQueue, _memory, dst.getMem(), srcOffset, size, dstOffset, 0, NULL, NULL); | ||
| DEBUG_CL(err); | ||
| } | ||
|
|
||
|
|
||
| void *Image2D::getInfo(const cl_image_info paramName) | ||
| { | ||
| cl_int err = 0; | ||
| size_t size; | ||
| err = clGetImageInfo (_memory, paramName, 0, NULL, &size); | ||
| DEBUG_CL(err); | ||
|
|
||
| if(size > 0) { | ||
| void* info = malloc(size); | ||
| err = clGetImageInfo (_memory, paramName, size, info, &size); | ||
| DEBUG_CL(err); | ||
| return info; | ||
| } | ||
| else return NULL; | ||
| } |
| @@ -0,0 +1,65 @@ | ||
|
|
||
| /*Copyright 2017 Sateesh Pedagadi | ||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
| http ://www.apache.org/licenses/LICENSE-2.0 | ||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License.*/ | ||
|
|
||
|
|
||
| #ifndef INCLUDES_H_ | ||
| #define INCLUDES_H_ | ||
|
|
||
|
|
||
| //#include <Windows.h> | ||
| #include <string.h> | ||
| #include <stdlib.h> | ||
| #include "CL/cl.h" | ||
|
|
||
| //General defines | ||
| #define MAX_STR_LEN 256 | ||
| #define HALF_STR_LEN MAX_STR_LEN/2 | ||
| #define Q_STR_LEN HALF_STR_LEN/2 | ||
|
|
||
| //OpenCL related | ||
| #define OCL_STATUS_READY 0 | ||
| #define OCL_STATUS_INITIALIZED 1 | ||
| #define OCL_STATUS_PROGRAM_ERROR 2 | ||
| #define OCL_STATUS_KERNEL_ERROR 3 | ||
| #define OCL_STATUS_MUTEX_ERROR 4 | ||
| #define OCL_STATUS_FINALIZED 5 | ||
|
|
||
| #define OCL_LOCK_SET 1 | ||
| #define OCL_LOCK_RELEASE 2 | ||
|
|
||
|
|
||
| /* | ||
| //Deep NN related | ||
| #define NN_MAX_KERNEL_COUNT 9 // 13 | ||
| #define NN_KERNEL_IDX_IM2COL3X3 0 | ||
| #define NN_KERNEL_IDX_IM2COL1X1 1 | ||
| #define NN_KERNEL_IDX_NORMARR 2 | ||
| #define NN_KERNEL_IDX_SCALEBIAS 3 | ||
| #define NN_KERNEL_IDX_ADDBIAS 4 | ||
| #define NN_KERNEL_IDX_SCALEADDBIAS 5 | ||
| #define NN_KERNEL_IDX_NORMSCALEADDBIAS 6 | ||
| #define NN_KERNEL_IDX_LEAKY_ACTIVATE 7 | ||
| #define NN_KERNEL_IDX_LINEAR_ACTIVATE 8 | ||
| #define NN_KERNEL_IDX_FLATARR 9 | ||
| #define NN_KERNEL_IDX_SOFTMAX 10 | ||
| #define NN_KERNEL_IDX_MAXPOOL 11 | ||
| #define NN_KERNEL_IDX_RESETARR 12 | ||
| */ | ||
|
|
||
|
|
||
|
|
||
| #endif /* INCLUDES_H_ */ | ||
|
|
||
|
|
| @@ -0,0 +1,111 @@ | ||
|
|
||
| /*Copyright 2017 Sateesh Pedagadi | ||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||
| you may not use this file except in compliance with the License. | ||
| You may obtain a copy of the License at | ||
| http ://www.apache.org/licenses/LICENSE-2.0 | ||
| Unless required by applicable law or agreed to in writing, software | ||
| distributed under the License is distributed on an "AS IS" BASIS, | ||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| See the License for the specific language governing permissions and | ||
| limitations under the License.*/ | ||
|
|
||
|
|
||
| #ifndef OCLWRAPPER_H_ | ||
| #define OCLWRAPPER_H_ | ||
|
|
||
| #include <chrono> | ||
| #include <algorithm> | ||
| #include <string> | ||
| #include <iostream> | ||
| #include "CL/cl.h" | ||
| #include "cl_wrapper.hpp" | ||
| #include "GlobalDefines.h" | ||
| #include "clblast.h" | ||
| //#include "clblast_half.h" | ||
|
|
||
| #define PINNED_MEM_OUTPUT | ||
|
|
||
| #define PROFILE_KERNELS 0 | ||
| #define BLOCK_KERNEL_EXEC 0 | ||
|
|
||
|
|
||
| using namespace std;; | ||
|
|
||
| string ExePath(); | ||
|
|
||
|
|
||
| typedef struct { | ||
|
|
||
| void *m_PinnedMemory; | ||
| OCLBuffer *m_OCLBuffer; | ||
|
|
||
| }StructPinnedOCLBuffer; | ||
|
|
||
| float sec(clock_t clocks); | ||
|
|
||
| enum { | ||
| NN_KERNEL_IDX_IM2COL3X3, | ||
| NN_KERNEL_IDX_IM2COL1X1, | ||
| NN_KERNEL_IDX_ADDBIAS, | ||
| NN_KERNEL_IDX_NORMSCALEADDBIAS, | ||
| NN_KERNEL_IDX_LEAKY_ACTIVATE, | ||
| NN_KERNEL_IDX_FLATARR, | ||
| NN_KERNEL_IDX_SOFTMAX, | ||
| NN_KERNEL_IDX_MAXPOOL, | ||
| NN_KERNEL_IDX_RESETARR, | ||
| NN_MAX_KERNEL_COUNT | ||
| }; | ||
|
|
||
| static const char* NN_KERNEL_NAMES[NN_MAX_KERNEL_COUNT] = { | ||
|
|
||
| "image2columarray3x3", | ||
| "image2columarray1x1", | ||
| "addbias", | ||
| "normscaleaddbias", | ||
| "leakyactivatearray", | ||
| "flattenarray", | ||
| "softmax", | ||
| "maxpool", | ||
| "resetarray" | ||
| }; | ||
|
|
||
| class OCLManager { | ||
|
|
||
| public: | ||
|
|
||
| OCLManager(); | ||
| ~OCLManager(); | ||
| int Initialize(); | ||
| int Finalize(); | ||
| void ReleaseLock(); | ||
| void SetLock(); | ||
|
|
||
| float SoftMax(OCLBuffer *input, int n, int offset, int groups, float temp, OCLBuffer *output, int base); | ||
| float OCLManager::ResetArray(int N, OCLBuffer *inArray, OCLBuffer *biasArray, int filtSize); | ||
|
|
||
| StructPinnedOCLBuffer* InitializePinnedFloatArray(size_t numItems); | ||
| void FinalizePinnedFloatArray(StructPinnedOCLBuffer*); | ||
|
|
||
| const char *GetDeviceName() { return m_DeviceName; }; | ||
|
|
||
| //private: | ||
|
|
||
| Program* m_OpenCLProgram; | ||
| void *m_RefObject; | ||
| int m_Status; | ||
| int m_LockStatus; | ||
| int m_CallerId; | ||
| //HANDLE m_LockMutex; | ||
| CLSetup m_OpenCLSetup; | ||
| KernelLauncher* m_OpenCLKernels[NN_MAX_KERNEL_COUNT]; | ||
| char m_DeviceName[256]; | ||
| }; | ||
|
|
||
|
|
||
|
|
||
| #endif /* OCLWRAPPER_H_ */ | ||
|
|
| @@ -0,0 +1,173 @@ | ||
| /* | ||
| **************************************************************************** | ||
| BSD License | ||
| Copyright (c) 2014, i-Vizon | ||
| All rights reserved. | ||
| Redistribution and use in source and binary forms, with or without | ||
| modification, are permitted provided that the following conditions are met: | ||
| 1. Redistributions of source code must retain the above copyright | ||
| notice, this list of conditions and the following disclaimer. | ||
| 2. Redistributions in binary form must reproduce the above copyright | ||
| notice, this list of conditions and the following disclaimer in the | ||
| documentation and/or other materials provided with the distribution. | ||
| 3. All advertising materials mentioning features or use of this software | ||
| must display the following acknowledgement: | ||
| This product includes software developed by the i-Vizon. | ||
| 4. Neither the name of the i-Vizon nor the | ||
| names of its contributors may be used to endorse or promote products | ||
| derived from this software without specific prior written permission. | ||
| THIS SOFTWARE IS PROVIDED BY Mageswaran.D ''AS IS'' AND ANY | ||
| EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
| WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
| DISCLAIMED. IN NO EVENT SHALL Mageswaran.D BE LIABLE FOR ANY | ||
| DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
| (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
| LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | ||
| ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
| SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
| **************************************************************************** | ||
| */ | ||
| /* | ||
| * ============================================================================= | ||
| * | ||
| * | ||
| * FILENAME : Buffer.h | ||
| * | ||
| * DESCRIPTION : A wrapper library for OpenCL and its native counter part | ||
| * intialization. With boost thread support. | ||
| * | ||
| * AUTHOR : Mageswaran D | ||
| * | ||
| * | ||
| * CHANGE HISTORY : | ||
| * | ||
| * DATE : 17th Mar 2014 | ||
| * | ||
| * File Created. | ||
| * | ||
| * ============================================================================= | ||
| */ | ||
| #ifndef BUFFER_H | ||
| #define BUFFER_H | ||
|
|
||
| #include "iv_common.h" | ||
| #include "CL/cl.h" | ||
|
|
||
| class Image2D; | ||
| /** | ||
| * @brief | ||
| * | ||
| */ | ||
| class OCLBuffer | ||
| { | ||
| public: | ||
| /** | ||
| * @brief | ||
| * | ||
| * @param tmp | ||
| * @param queue | ||
| */ | ||
| OCLBuffer(cl_mem tmp, cl_command_queue *queue); | ||
|
|
||
| /** | ||
| * @brief | ||
| * | ||
| * @return cl_mem | ||
| */ | ||
| cl_mem getMem(); | ||
|
|
||
| /** | ||
| * @brief | ||
| * | ||
| * @param hostMem | ||
| * @param size | ||
| * @param offset | ||
| * @param blocking | ||
| */ | ||
| void read(void* hostMem, const size_t size, const size_t offset=0, const cl_bool blocking=CL_TRUE); | ||
|
|
||
| /** | ||
| * @brief | ||
| * | ||
| * @param hostMem | ||
| * @param size | ||
| * @param offset | ||
| * @param blocking | ||
| */ | ||
| void write(const void* hostMem, const size_t size, const size_t offset=0, const cl_bool blocking=CL_TRUE); | ||
|
|
||
| /** | ||
| * @brief | ||
| * | ||
| * @param dst | ||
| * @param size | ||
| * @param srcOffset | ||
| * @param dstOffset | ||
| */ | ||
| void copy(OCLBuffer& dst, const size_t size, const size_t srcOffset=0, const size_t dstOffset=0); | ||
|
|
||
|
|
||
| /** | ||
| * @brief | ||
| * | ||
| * @param flags | ||
| * @param size | ||
| * @param offset | ||
| * @param blocking | ||
| */ | ||
| void* map(const cl_map_flags flags, const size_t size, const size_t offset, const cl_bool blocking=CL_TRUE); | ||
|
|
||
| void FillBuffer(const void *hostMem, const size_t size, const size_t offset); | ||
|
|
||
| /** | ||
| * @brief | ||
| * | ||
| * @param mappedPtr | ||
| */ | ||
| void unmap(void* mappedPtr); | ||
|
|
||
| /** | ||
| * @brief | ||
| * | ||
| * @param dst | ||
| * @param size[] | ||
| * @param srcOffset | ||
| * @param dstOffset[] | ||
| */ | ||
| void copyToImage2D(Image2D& dst, const size_t size[2], const size_t srcOffset, const size_t dstOffset[2]); | ||
|
|
||
| /** | ||
| * @brief | ||
| * | ||
| * @param dst | ||
| * @param size[] | ||
| * @param srcOffset | ||
| * @param dstOffset[] | ||
| */ | ||
| void copyToImage3D(Image2D& dst, const size_t size[3], const size_t srcOffset, const size_t dstOffset[3]); | ||
|
|
||
| /** | ||
| * @brief | ||
| * | ||
| * @param paramName | ||
| */ | ||
| void* getMemInfo(const cl_mem_info paramName); | ||
|
|
||
| virtual ~OCLBuffer() | ||
| { | ||
| clReleaseMemObject(_memory); | ||
| //DEBUG_STRING("Releasing GPU Memory Buffers"); | ||
| } | ||
|
|
||
| protected: | ||
| cl_mem _memory; /**< TODO */ | ||
| cl_command_queue* _pQueue; /**< TODO */ | ||
| private: | ||
|
|
||
| }; | ||
|
|
||
| #endif // BUFFER_H |
| @@ -0,0 +1,185 @@ | ||
| #ifndef CL_WRAPPER_H | ||
| #define CL_WRAPPER_H | ||
|
|
||
| /** | ||
| * Enable CL_INFO_PRINT macro to see OpenCL Device and Kernel Info | ||
| * | ||
| */ | ||
| #include "iv_common.h" | ||
| #include "CL/cl.h" | ||
| //#include "CL/cl.hpp" | ||
|
|
||
| #include "program.h" | ||
| #include "buffer.h" | ||
| #include "image2D.h" | ||
| #include "sampler.h" | ||
|
|
||
|
|
||
| #define NUMBER_OF_PLATFORMS 5 | ||
| #define NUMBER_OF_DEVICES 5 | ||
| class CLSetup | ||
| { | ||
| public: | ||
| void init(char* deviceName); | ||
| void getPlatformID(); | ||
| void getDeviceID(char* deviceName); | ||
| void getContextnQueue(); | ||
| //Program* createProgram(std::string &kernelFilePath); | ||
| Program* createProgram(std::vector<std::string> kernelFilePath); | ||
| OCLBuffer* createBuffer(const size_t size, | ||
| const cl_mem_flags flags, | ||
| void* hostMem); | ||
| Image2D* createImage2D(const size_t width, | ||
| const size_t height, | ||
| const cl_image_format* format, | ||
| const cl_mem_flags flags = 0, | ||
| const size_t rowPitch = 0, | ||
| void* hostMem = NULL); | ||
| Sampler* createSampler(cl_bool normalizedCoords, | ||
| cl_addressing_mode addrMode, | ||
| cl_filter_mode filterMode); | ||
|
|
||
| cl_command_queue* getQueue() { | ||
|
|
||
| return &_queue; | ||
| } | ||
|
|
||
| cl_context* getContext() { | ||
|
|
||
| return &_context; | ||
| } | ||
|
|
||
| //>>>>>>>>>>>>>>>>>Get Info | ||
| int getNumberOfPlatforms() | ||
| { | ||
| return _numPlatforms; | ||
| } | ||
| void getDeviceName() | ||
| { | ||
| char* infoName; | ||
| size_t infoCLSize; | ||
| clGetPlatformInfo(_platformID,CL_PLATFORM_NAME, NULL, NULL, &infoCLSize); | ||
| infoName = (char*)malloc(sizeof(char)*infoCLSize); | ||
| clGetPlatformInfo(_platformID,CL_PLATFORM_NAME, sizeof(char) * infoCLSize, infoName, NULL ); | ||
| std::cout<<"CL_PLATFORM_NAME : "<<infoName<<std::endl; | ||
| free(infoName); | ||
| } | ||
| int getNumberOfDevices() | ||
| { | ||
| return _numDevices; | ||
| } | ||
| /// | ||
| /// \brief Compute units are equivalent of MultiProcessor | ||
| /// (which can have either 8, 32, 48 or even 192 cores), | ||
| /// and these are designed to be able to simultanesouly | ||
| /// run up to 8 work groups (blocks in CUDA) each. | ||
| /// Eg: GeForce GT 640 : 2 * 196 = 384 cores | ||
| /// Eg: Vivante GC2000 : 4 * | ||
| /// \return Maximum Compute Units available in your GPU in size_t | ||
|
|
||
| cl_uint getMaxComputeUnits() | ||
| { | ||
| return _maxComputeUnits; | ||
| } | ||
| size_t getPerferredWorkGroupSize() | ||
| { | ||
| return _perferredWrkGrpSize; ///!TODO:Need to get after from kernel init | ||
| } | ||
| /// | ||
| /// \brief getMaxWorkGroupSize | ||
| /// Returns maximum number of work items in a workgroup | ||
| /// Eg: GeForce GT 640 : 48KBytes | ||
| /// Eg: Vivante GC2000 : 1KBytes | ||
| /// \return | ||
| /// | ||
| size_t getMaxWorkGroupSize() | ||
| { | ||
| return _maxWorkGroupSize; | ||
| } | ||
| /// | ||
| /// \brief Returns the local memory size of device kernel groups | ||
| /// | ||
| /// \return | ||
| /// | ||
| cl_ulong getLocalMemSize() | ||
| { | ||
| return _localMemSize; | ||
| } | ||
| size_t getPrefferedWorkGroupSize() | ||
| { | ||
| return _preferredWorkGrpSize; | ||
| } | ||
|
|
||
| void getSupportedExtensions() | ||
| { | ||
| size_t infoCLSize; | ||
| char* infoExtensions; | ||
| clGetPlatformInfo(_platformID,CL_PLATFORM_EXTENSIONS, NULL, NULL, &infoCLSize); | ||
| infoExtensions = (char*)malloc(sizeof(char)*infoCLSize); | ||
| clGetPlatformInfo(_platformID,CL_PLATFORM_EXTENSIONS, sizeof(char) * infoCLSize, infoExtensions, NULL ); | ||
| std::cout<<"CL_PLATFORM_EXTENSIONS : "<<infoExtensions<<std::endl; | ||
| free(infoExtensions); | ||
| } | ||
|
|
||
| ~CLSetup() | ||
| { | ||
| clReleaseCommandQueue(_queue); | ||
| clReleaseContext(_context); | ||
| } | ||
|
|
||
| protected: | ||
| private: | ||
| //>>>>>>>>>>>>>>>>>Info Members | ||
| cl_int _status; | ||
| cl_long _infoValue; | ||
|
|
||
| //>>>>>>>>>>>>>>>>>Platform Members | ||
| cl_uint _numPlatforms; | ||
| cl_platform_id _platformID; | ||
| /** | ||
| * @brief _platformIDsVector | ||
| * _platformIDsVector[interestedPlatformNum] | ||
| */ | ||
| std::vector<cl_platform_id> _platformIDsVector; //!TODO: | ||
|
|
||
| //>>>>>>>>>>>>>>>>>Device Members | ||
| cl_uint _numDevices; | ||
| cl_device_id _deviceID; | ||
|
|
||
| cl_uint _maxComputeUnits; | ||
| size_t _maxWorkGroupSize; | ||
| cl_ulong _maxMemAllocSize; | ||
| cl_ulong _globalMemSize; | ||
| cl_ulong _constMemSize; | ||
| cl_ulong _localMemSize; | ||
| size_t _preferredWorkGrpSize; | ||
|
|
||
|
|
||
|
|
||
| //>>>>>>>>>>>>>>>>>Context Members | ||
| cl_context _context; | ||
|
|
||
| //>>>>>>>>>>>>>>>>>Queue Members | ||
| cl_command_queue _queue; | ||
|
|
||
| //>>>>>>>>>>>>>>>>>Program Members | ||
| cl_program _program; | ||
|
|
||
| //>>>>>>>>>>>>>>>>>Kernel Members | ||
| cl_kernel _kernel; | ||
| size_t _compileWrkGrpSize; | ||
| size_t _wrkGrpSize; | ||
| size_t _perferredWrkGrpSize; | ||
| cl_ulong _localMem; | ||
| cl_ulong _privateMem; | ||
|
|
||
| //>>>>>>>>>>>>>>>>>Image2D Members | ||
| cl_mem im2d; | ||
|
|
||
|
|
||
| }; | ||
|
|
||
| //const char * get_error_string(cl_int err); | ||
|
|
||
|
|
||
| #endif // CL_WRAPPER_H |
| @@ -0,0 +1,33 @@ | ||
|
|
||
| #ifndef __CL_UTILS__H__ | ||
| #define __CL_UTILS__H__ | ||
|
|
||
| #include <CL/opencl.h> | ||
|
|
||
| char* utils_cl_enum_to_string (cl_int value); | ||
|
|
||
| char* utils_get_ocl_error (cl_int err_code); | ||
|
|
||
| int utils_get_platform_and_device (cl_device_type dev_type, | ||
| cl_platform_id *platform, | ||
| cl_device_id *device_id, | ||
| int just_print); | ||
|
|
||
| void utils_print_device_info (cl_device_id dev_id); | ||
|
|
||
| void utils_print_platform_info (cl_platform_id dev_id); | ||
|
|
||
| char* utils_read_file (const char* filename); | ||
|
|
||
| cl_ulong utils_get_event_time (cl_event event, | ||
| cl_profiling_info param); | ||
|
|
||
| cl_ulong utils_get_event_execution_time (cl_event event); | ||
|
|
||
| #define CHECK_OCL_ERROR(op_name, err_code) \ | ||
| if (err_code != CL_SUCCESS) \ | ||
| {\ | ||
| printf ("%s:%d: " #op_name " failed! %s\n", __FILE__, __LINE__, utils_get_ocl_error (err_code));\ | ||
| } | ||
|
|
||
| #endif |
| @@ -0,0 +1,24 @@ | ||
|
|
||
| #ifndef HELPERS__H | ||
| #define HELPERS__H | ||
|
|
||
| #include <string> | ||
| #include <vector> | ||
| #include <sstream> | ||
|
|
||
| std::vector<std::string> split(const std::string &s, char delim); | ||
|
|
||
| template <class T> | ||
| std::string vector_join( const std::vector<T>& v, const std::string& token ) { | ||
| std::stringstream result; | ||
| for (typename std::vector<T>::const_iterator i = v.begin(); i != v.end(); i++) { | ||
| if ( i != v.begin() ) | ||
| result << token; | ||
| result << *i; | ||
| } | ||
| return result.str(); | ||
| } | ||
|
|
||
| std::string randUuid(); | ||
|
|
||
| #endif // HELPERS__H |
| @@ -0,0 +1,83 @@ | ||
|
|
||
| #ifndef IMAGE_2D_H | ||
| #define IMAGE_2D_H | ||
|
|
||
| #include "CL/cl.h" | ||
| #include "buffer.h" | ||
|
|
||
| class Image2D : public OCLBuffer | ||
| { | ||
| public: | ||
| /** | ||
| * @brief | ||
| * | ||
| * @param mem | ||
| * @param queue | ||
| * @param rowPitch | ||
| */ | ||
| Image2D(cl_mem mem, cl_command_queue* queue, int rowPitch = 0); | ||
| /** | ||
| * @brief | ||
| * | ||
| * @param hostMem | ||
| * @param size[] | ||
| * @param offset[] | ||
| * @param blocking | ||
| */ | ||
| void read(void* hostMem, const size_t size[2], | ||
| const size_t offset[2], cl_bool blocking = CL_TRUE); | ||
| /** | ||
| * @brief | ||
| * | ||
| * @param hostMem | ||
| * @param size[] | ||
| * @param offset[] | ||
| * @param blocking | ||
| */ | ||
| void write(void* hostMem, const size_t size[2], | ||
| const size_t offset[2], cl_bool blocking = CL_TRUE); | ||
| /** | ||
| * @brief | ||
| * | ||
| * @param flags | ||
| * @param size[] | ||
| * @param offset[] | ||
| * @param rowPitch | ||
| * @param blocking | ||
| */ | ||
| void* map(cl_map_flags flags, const size_t size[2], | ||
| const size_t offset[2], size_t& rowPitch, cl_bool blocking = CL_TRUE); | ||
| /** | ||
| * @brief | ||
| * | ||
| * @param dst | ||
| * @param size[] | ||
| * @param srcOffset[] | ||
| * @param dstOffset | ||
| */ | ||
| void copyToBuffer(OCLBuffer& dst, const size_t size[2], | ||
| const size_t srcOffset[2], const size_t dstOffset = 0); | ||
| /** | ||
| * @brief | ||
| * | ||
| * @param paramName | ||
| */ | ||
| void* getInfo(const cl_image_info paramName); | ||
| /** | ||
| * @brief | ||
| * | ||
| */ | ||
| ~Image2D() | ||
| { | ||
|
|
||
| } | ||
|
|
||
| protected: | ||
| size_t _rowPitch; | ||
|
|
||
| private: | ||
| }; | ||
|
|
||
|
|
||
|
|
||
| #endif //IMAGE_2D_H |
| @@ -0,0 +1,160 @@ | ||
|
|
||
|
|
||
| #ifndef COMMON_H | ||
| #define COMMON_H | ||
|
|
||
| #include <stdio.h> | ||
| #include <stdlib.h> | ||
| #include <string.h> | ||
| #include <math.h> | ||
| #include <iostream> | ||
| #include <fstream> | ||
| #include <iostream> | ||
| #include <iterator> | ||
| #include <map> | ||
| #include <iostream> | ||
| #include <vector> | ||
|
|
||
| #include "CL/cl.h" | ||
|
|
||
| ///!TODO: Use bits as enum value | ||
| typedef enum { | ||
| IV_CHAR_FILE_OPEN_FAIL = 0, | ||
| IV_CHAR_FILE_STATUS_FAIL | ||
| }IV_ERRORS_INFO; | ||
|
|
||
| const char* getCLErrorString(int err); | ||
| const char* getCustomErrorString(int err, IV_ERRORS_INFO info); | ||
| std::string getImgType(int imgTypeInt); | ||
|
|
||
|
|
||
| /*#define DEBUG_CL(err) \ | ||
| if(err< 0) { \ | ||
| std::cout<<"FILE: "<<__FILE__<<std::cout<<" Line Number: "<<__LINE__\ | ||
| <<" Function Name : "<<__func__<<"Error Name:" \ | ||
| <<getCLErrorString(err) \ | ||
| <<std::endl; \ | ||
| exit(err); }*/ | ||
|
|
||
| #define DEBUG_IV(err, info) \ | ||
| if(err< 0) { \ | ||
| std::cout<<"Line Number: "<<__LINE__<<" Function Name : "<<__func__<<"Error Name:" \ | ||
| <<getCustomErrorString(err, info) \ | ||
| <<std::endl; \ | ||
| exit(err); } | ||
|
|
||
| //#define DEBUG_VALUE(dbgMsg, value) \ | ||
| // std::cout<<"=====>"<<" " <<dbgMsg<<"......."<<value<<std::endl; | ||
|
|
||
| //#define DEBUG_STRING(dbgMsg) \ | ||
| //std::cout<<">>>>>>"<<dbgMsg<<std::endl; | ||
|
|
||
| #define ERROR_PRINT_VALUE(dbgMsg, value) \ | ||
| { \ | ||
| std::cout<<"\n=====> Line Number: "<<__LINE__<<" Function Name :"<<__func__\ | ||
| <<"\n "<<dbgMsg<<" "<<value<<std::endl; \ | ||
| exit(0); \ | ||
| }; | ||
|
|
||
| #define ERROR_PRINT_STRING(dbgMsg) \ | ||
| { \ | ||
| std::cout<<"\n=====> Line Number: "<<__LINE__<<" Function Name: "<<__func__\ | ||
| <<"\n "<<dbgMsg<<std::endl; \ | ||
| exit(0); \ | ||
| }; | ||
|
|
||
| #ifdef IVIZON_DEBUG | ||
| #define F_LOG LogBlock _l(__func__) | ||
| struct LogBlock { | ||
| const char *mLine; | ||
| LogBlock(const char *line) : mLine(line) { | ||
| std::cout<<mLine <<" ----->#### Enter \n"; | ||
| } | ||
| ~LogBlock() { | ||
| std::cout<<mLine <<" <-----#### Leave \n"; | ||
| } | ||
| }; | ||
| #else | ||
| #define F_LOG {} | ||
| #define DEBUG_CL(err) {} | ||
| #define DEBUG_STRING(dbgMsg) {} | ||
| #define DEBUG_VALUE(dbgMsg, value) {} | ||
| #endif | ||
|
|
||
|
|
||
|
|
||
| // CV Defines | ||
| #define CV_8U 0 | ||
| #define CV_8S 1 | ||
| #define CV_16U 2 | ||
| #define CV_16S 3 | ||
| #define CV_32S 4 | ||
| #define CV_32F 5 | ||
| #define CV_64F 6 | ||
|
|
||
| #define CV_CN_MAX 512 | ||
|
|
||
| #define CV_MAT_DEPTH_MASK (CV_DEPTH_MAX - 1) | ||
|
|
||
| #define CV_CN_SHIFT 3 | ||
| #define CV_MAT_DEPTH(flags) ((flags) & CV_MAT_DEPTH_MASK) | ||
| #define CV_DEPTH_MAX (1 << CV_CN_SHIFT) | ||
|
|
||
| #define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT)) | ||
| #define CV_MAKE_TYPE CV_MAKETYPE | ||
|
|
||
| #define CV_8UC1 CV_MAKETYPE(CV_8U,1) | ||
| #define CV_8UC2 CV_MAKETYPE(CV_8U,2) | ||
| #define CV_8UC3 CV_MAKETYPE(CV_8U,3) | ||
| #define CV_8UC4 CV_MAKETYPE(CV_8U,4) | ||
| #define CV_8UC(n) CV_MAKETYPE(CV_8U,(n)) | ||
|
|
||
| #define CV_8SC1 CV_MAKETYPE(CV_8S,1) | ||
| #define CV_8SC2 CV_MAKETYPE(CV_8S,2) | ||
| #define CV_8SC3 CV_MAKETYPE(CV_8S,3) | ||
| #define CV_8SC4 CV_MAKETYPE(CV_8S,4) | ||
| #define CV_8SC(n) CV_MAKETYPE(CV_8S,(n)) | ||
|
|
||
| #define CV_16UC1 CV_MAKETYPE(CV_16U,1) | ||
| #define CV_16UC2 CV_MAKETYPE(CV_16U,2) | ||
| #define CV_16UC3 CV_MAKETYPE(CV_16U,3) | ||
| #define CV_16UC4 CV_MAKETYPE(CV_16U,4) | ||
| #define CV_16UC(n) CV_MAKETYPE(CV_16U,(n)) | ||
|
|
||
| #define CV_16SC1 CV_MAKETYPE(CV_16S,1) | ||
| #define CV_16SC2 CV_MAKETYPE(CV_16S,2) | ||
| #define CV_16SC3 CV_MAKETYPE(CV_16S,3) | ||
| #define CV_16SC4 CV_MAKETYPE(CV_16S,4) | ||
| #define CV_16SC(n) CV_MAKETYPE(CV_16S,(n)) | ||
|
|
||
| #define CV_32SC1 CV_MAKETYPE(CV_32S,1) | ||
| #define CV_32SC2 CV_MAKETYPE(CV_32S,2) | ||
| #define CV_32SC3 CV_MAKETYPE(CV_32S,3) | ||
| #define CV_32SC4 CV_MAKETYPE(CV_32S,4) | ||
| #define CV_32SC(n) CV_MAKETYPE(CV_32S,(n)) | ||
|
|
||
| #define CV_32FC1 CV_MAKETYPE(CV_32F,1) | ||
| #define CV_32FC2 CV_MAKETYPE(CV_32F,2) | ||
| #define CV_32FC3 CV_MAKETYPE(CV_32F,3) | ||
| #define CV_32FC4 CV_MAKETYPE(CV_32F,4) | ||
| #define CV_32FC(n) CV_MAKETYPE(CV_32F,(n)) | ||
|
|
||
| #define CV_64FC1 CV_MAKETYPE(CV_64F,1) | ||
| #define CV_64FC2 CV_MAKETYPE(CV_64F,2) | ||
| #define CV_64FC3 CV_MAKETYPE(CV_64F,3) | ||
| #define CV_64FC4 CV_MAKETYPE(CV_64F,4) | ||
| #define CV_64FC(n) CV_MAKETYPE(CV_64F,(n)) | ||
|
|
||
|
|
||
| /// IV Data Types | ||
| //This is done to have more control on memory and range on numbers | ||
|
|
||
| typedef unsigned char IV_8U; | ||
| typedef char IV_8S; | ||
| typedef unsigned IV_16U; | ||
| typedef signed IV_16S; | ||
| typedef int IV_32S; | ||
| typedef unsigned int IV_32U; | ||
| typedef float IV_32F; | ||
| typedef double IV_64F; | ||
| #endif |
| @@ -0,0 +1,115 @@ | ||
|
|
||
|
|
||
| #ifndef KERNELLAUNCHER_H | ||
| #define KERNELLAUNCHER_H | ||
|
|
||
| #include "iv_common.h" | ||
|
|
||
|
|
||
|
|
||
| class KernelLauncher | ||
| { | ||
| public: | ||
| KernelLauncher(cl_device_id _device_id, cl_kernel* kernel, cl_command_queue* queue, std::string kernelNAme); | ||
|
|
||
| ///For a continuous aggignment using an object | ||
| KernelLauncher& global(const int g); | ||
| KernelLauncher& global(const int gx, const int gy); | ||
| KernelLauncher& global(const int gx, const int gy, const int gz); | ||
| KernelLauncher& local(const int l); | ||
| KernelLauncher& local(const int lx, const int ly); | ||
| KernelLauncher& local(const int lx, const int ly, const int lz); | ||
| ///For a continuous aggignment using an pointer object | ||
| KernelLauncher* pGlobal(const int g); | ||
| KernelLauncher* pGlobal(const int gx, const int gy); | ||
| KernelLauncher* pGlobal(const int gx, const int gy, const int gz); | ||
| KernelLauncher* pLocal(const int l); | ||
| KernelLauncher* pLocal(const int lx, const int ly); | ||
| KernelLauncher* pLocal(const int lx, const int ly, const int lz); | ||
|
|
||
| int countArgs(); | ||
|
|
||
| ///For a continuous aggignment using an object | ||
| template<class T> | ||
| KernelLauncher& arg(const int index, T x) { | ||
| if (index >= _numArgs || index < 0) { | ||
| std::cout << "Error: argument index out of range" << std::endl; | ||
| exit(-1);///!TODO: Custom exit code | ||
| } | ||
| cl_int status = clSetKernelArg(*_pKernel, index, sizeof(x), &x); | ||
| DEBUG_CL(status); | ||
| _argListData[index] = true; | ||
| return *this; | ||
| } | ||
| ///For a continuous aggignment using an pointer object | ||
| template<class T> | ||
| KernelLauncher& arg(T x) { | ||
| int nArgs = countArgs(); | ||
| if (nArgs >= _numArgs) { | ||
| std::cout << "Error trying to enqueue too much arguments" << std::endl; | ||
| std::cout << "Expected " << _numArgs << ", got " << nArgs << std::endl; | ||
| exit(-1);///!TODO: Custom exit code | ||
| } | ||
| for(int i=0; i<_numArgs; i++) | ||
| if(!_argListData[i]) | ||
| return arg(i, x); | ||
| return *this; | ||
| } | ||
|
|
||
| ///For a continuous aggignment using an object | ||
| template<class T> | ||
| KernelLauncher* pArg(const int index, T &x) { | ||
| if (index >= _numArgs || index < 0) { | ||
| std::cout << "Error: argument index out of range" << std::endl; | ||
| exit(-1);///!TODO: Custom exit code | ||
| } | ||
| cl_int status = clSetKernelArg(*_pKernel, index, sizeof(x), &x); | ||
| DEBUG_VALUE("Setting Kernel Argument: ", index); | ||
| DEBUG_VALUE("Value/Address: ", x); | ||
| DEBUG_VALUE("Size : ", sizeof(x)); | ||
| DEBUG_CL(status); | ||
| _argListData[index] = true; | ||
| return this; | ||
| } | ||
| ///For a continuous aggignment using an pointer object | ||
| template<class T> | ||
| KernelLauncher* pArg(T &x) { | ||
| int nArgs = countArgs(); | ||
| if (nArgs > _numArgs) { | ||
| std::cout << "Error trying to enqueue too much arguments" << std::endl; | ||
| std::cout << "Expected " << _numArgs << ", got " << nArgs << std::endl; | ||
| exit(-1);///!TODO: Custom exit code | ||
| } | ||
| for(int i=0; i<_numArgs; i++) | ||
| if(!_argListData[i]) | ||
| return pArg(i, x); | ||
| return this; | ||
| } | ||
| float run(bool profile, bool block); | ||
|
|
||
| ~KernelLauncher() | ||
| { | ||
| clReleaseKernel(*_pKernel); | ||
| } | ||
|
|
||
| size_t GetOptimalLWGSize() { | ||
|
|
||
| return _optimal_local_workgroup_size; | ||
| } | ||
|
|
||
| protected: | ||
| private: | ||
| cl_kernel* _pKernel; | ||
| cl_command_queue* _pQueue; | ||
| cl_int _numArgs; | ||
| size_t _globalWorkSize[3]; | ||
| size_t _localWorkSize[3]; | ||
| cl_int _dimensions; | ||
| std::string _kernel_name; | ||
| cl_bool* _argListData; | ||
| cl_device_id _device_id; | ||
| size_t _optimal_local_workgroup_size; | ||
| }; | ||
|
|
||
|
|
||
| #endif // KERNELLAUNCHER_H |
| @@ -0,0 +1,188 @@ | ||
| #ifndef OCL_UTILS_H | ||
| #define OCL_UTILS_H | ||
|
|
||
| // ********************************************************************* | ||
| // Utilities specific to OpenCL samples in NVIDIA GPU Computing SDK | ||
| // ********************************************************************* | ||
|
|
||
| // Common headers: Cross-API utililties and OpenCL header | ||
| #include "shrUtils.h" | ||
|
|
||
| // All OpenCL headers | ||
| #if defined (__APPLE__) || defined(MACOSX) | ||
| #include <OpenCL/opencl.h> | ||
| #else | ||
| #include <CL/opencl.h> | ||
| #endif | ||
|
|
||
| // Includes | ||
| #include <stdio.h> | ||
| #include <string.h> | ||
| #include <stdlib.h> | ||
|
|
||
| // For systems with CL_EXT that are not updated with these extensions, we copied these | ||
| // extensions from <CL/cl_ext.h> | ||
| #ifndef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV | ||
| /* cl_nv_device_attribute_query extension - no extension #define since it has no functions */ | ||
| #define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000 | ||
| #define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001 | ||
| #define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002 | ||
| #define CL_DEVICE_WARP_SIZE_NV 0x4003 | ||
| #define CL_DEVICE_GPU_OVERLAP_NV 0x4004 | ||
| #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005 | ||
| #define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006 | ||
| #endif | ||
|
|
||
| // reminders for build output window and log | ||
| #ifdef _WIN32 | ||
| #pragma message ("Note: including shrUtils.h") | ||
| #pragma message ("Note: including opencl.h") | ||
| #endif | ||
|
|
||
| // SDK Revision # | ||
| #define OCL_SDKREVISION "7027912" | ||
|
|
||
| // Error and Exit Handling Macros... | ||
| // ********************************************************************* | ||
| // Full error handling macro with Cleanup() callback (if supplied)... | ||
| // (Companion Inline Function lower on page) | ||
| #define oclCheckErrorEX(a, b, c) __oclCheckErrorEX(a, b, c, __FILE__ , __LINE__) | ||
|
|
||
| // Short version without Cleanup() callback pointer | ||
| // Both Input (a) and Reference (b) are specified as args | ||
| #define oclCheckError(a, b) oclCheckErrorEX(a, b, 0) | ||
|
|
||
| ////////////////////////////////////////////////////////////////////////////// | ||
| //! Gets the platform ID for NVIDIA if available, otherwise default to platform 0 | ||
| //! | ||
| //! @return the id | ||
| //! @param clSelectedPlatformID OpenCL platform ID | ||
| ////////////////////////////////////////////////////////////////////////////// | ||
| extern "C" cl_int oclGetPlatformID(cl_platform_id* clSelectedPlatformID); | ||
|
|
||
| ////////////////////////////////////////////////////////////////////////////// | ||
| //! Print info about the device | ||
| //! | ||
| //! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE | ||
| //! @param device OpenCL id of the device | ||
| ////////////////////////////////////////////////////////////////////////////// | ||
| extern "C" void oclPrintDevInfo(int iLogMode, cl_device_id device); | ||
|
|
||
| ////////////////////////////////////////////////////////////////////////////// | ||
| //! Get and return device capability | ||
| //! | ||
| //! @return the 2 digit integer representation of device Cap (major minor). return -1 if NA | ||
| //! @param device OpenCL id of the device | ||
| ////////////////////////////////////////////////////////////////////////////// | ||
| extern "C" int oclGetDevCap(cl_device_id device); | ||
|
|
||
| ////////////////////////////////////////////////////////////////////////////// | ||
| //! Print the device name | ||
| //! | ||
| //! @param iLogMode enum LOGBOTH, LOGCONSOLE, LOGFILE | ||
| //! @param device OpenCL id of the device | ||
| ////////////////////////////////////////////////////////////////////////////// | ||
| extern "C" void oclPrintDevName(int iLogMode, cl_device_id device); | ||
|
|
||
| ////////////////////////////////////////////////////////////////////////////// | ||
| //! Gets the id of the first device from the context | ||
| //! | ||
| //! @return the id | ||
| //! @param cxGPUContext OpenCL context | ||
| ////////////////////////////////////////////////////////////////////////////// | ||
| extern "C" cl_device_id oclGetFirstDev(cl_context cxGPUContext); | ||
|
|
||
| ////////////////////////////////////////////////////////////////////////////// | ||
| //! Gets the id of the nth device from the context | ||
| //! | ||
| //! @return the id or -1 when out of range | ||
| //! @param cxGPUContext OpenCL context | ||
| //! @param device_idx index of the device of interest | ||
| ////////////////////////////////////////////////////////////////////////////// | ||
| extern "C" cl_device_id oclGetDev(cl_context cxGPUContext, unsigned int device_idx); | ||
|
|
||
| ////////////////////////////////////////////////////////////////////////////// | ||
| //! Gets the id of device with maximal FLOPS from the context | ||
| //! | ||
| //! @return the id | ||
| //! @param cxGPUContext OpenCL context | ||
| ////////////////////////////////////////////////////////////////////////////// | ||
| extern "C" cl_device_id oclGetMaxFlopsDev(cl_context cxGPUContext); | ||
|
|
||
| ////////////////////////////////////////////////////////////////////////////// | ||
| //! Loads a Program file and prepends the cPreamble to the code. | ||
| //! | ||
| //! @return the source string if succeeded, 0 otherwise | ||
| //! @param cFilename program filename | ||
| //! @param cPreamble code that is prepended to the loaded file, typically a set of #defines or a header | ||
| //! @param szFinalLength returned length of the code string | ||
| ////////////////////////////////////////////////////////////////////////////// | ||
| extern "C" char* oclLoadProgSource(const char* cFilename, const char* cPreamble, size_t* szFinalLength); | ||
|
|
||
| ////////////////////////////////////////////////////////////////////////////// | ||
| //! Get the binary (PTX) of the program associated with the device | ||
| //! | ||
| //! @param cpProgram OpenCL program | ||
| //! @param cdDevice device of interest | ||
| //! @param binary returned code | ||
| //! @param length length of returned code | ||
| ////////////////////////////////////////////////////////////////////////////// | ||
| extern "C" void oclGetProgBinary( cl_program cpProgram, cl_device_id cdDevice, char** binary, size_t* length); | ||
|
|
||
| ////////////////////////////////////////////////////////////////////////////// | ||
| //! Get and log the binary (PTX) from the OpenCL compiler for the requested program & device | ||
| //! | ||
| //! @param cpProgram OpenCL program | ||
| //! @param cdDevice device of interest | ||
| //! @param const char* cPtxFileName optional PTX file name | ||
| ////////////////////////////////////////////////////////////////////////////// | ||
| extern "C" void oclLogPtx(cl_program cpProgram, cl_device_id cdDevice, const char* cPtxFileName); | ||
|
|
||
| ////////////////////////////////////////////////////////////////////////////// | ||
| //! Get and log the Build Log from the OpenCL compiler for the requested program & device | ||
| //! | ||
| //! @param cpProgram OpenCL program | ||
| //! @param cdDevice device of interest | ||
| ////////////////////////////////////////////////////////////////////////////// | ||
| extern "C" void oclLogBuildInfo(cl_program cpProgram, cl_device_id cdDevice); | ||
|
|
||
| // Helper function for De-allocating cl objects | ||
| // ********************************************************************* | ||
| extern "C" void oclDeleteMemObjs(cl_mem* cmMemObjs, int iNumObjs); | ||
|
|
||
| // Helper function to get OpenCL error string from constant | ||
| // ********************************************************************* | ||
| extern "C" const char* oclErrorString(cl_int error); | ||
|
|
||
| // Helper function to get OpenCL image format string (channel order and type) from constant | ||
| // ********************************************************************* | ||
| extern "C" const char* oclImageFormatString(cl_uint uiImageFormat); | ||
|
|
||
| // companion inline function for error checking and exit on error WITH Cleanup Callback (if supplied) | ||
| // ********************************************************************* | ||
| inline void __oclCheckErrorEX(cl_int iSample, cl_int iReference, void (*pCleanup)(int), const char* cFile, const int iLine) | ||
| { | ||
| // An error condition is defined by the sample/test value not equal to the reference | ||
| if (iReference != iSample) | ||
| { | ||
| // If the sample/test value isn't equal to the ref, it's an error by defnition, so override 0 sample/test value | ||
| iSample = (iSample == 0) ? -9999 : iSample; | ||
|
|
||
| // Log the error info | ||
| shrLog("\n !!! Error # %i (%s) at line %i , in file %s !!!\n\n", iSample, oclErrorString(iSample), iLine, cFile); | ||
|
|
||
| // Cleanup and exit, or just exit if no cleanup function pointer provided. Use iSample (error code in this case) as process exit code. | ||
| if (pCleanup != NULL) | ||
| { | ||
| pCleanup(iSample); | ||
| } | ||
| else | ||
| { | ||
| shrLogEx(LOGBOTH | CLOSELOG, 0, "Exiting...\n"); | ||
| exit(iSample); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| #endif | ||
|
|
| @@ -0,0 +1,40 @@ | ||
| #include <CL/cl.h> | ||
| #include "iv_common.h" | ||
| #include "kernel_launcher.h" | ||
|
|
||
|
|
||
|
|
||
| #ifndef PROGRAM_H | ||
| #define PROGRAM_H | ||
|
|
||
| class Program | ||
| { | ||
| public: | ||
| //Program(std::string &kernelFilePath, cl_context* context, cl_command_queue* queue, cl_device_id* device); | ||
| Program(std::vector<std::string> &kernelFilePath, cl_context* context, cl_command_queue* queue, cl_device_id* device); | ||
| void createProgram(std::string filePath); | ||
| void buildProgram(); | ||
| KernelLauncher* createKernelLauncher(std::string kernelName); | ||
| ~Program() | ||
| { | ||
| clReleaseProgram(_program); | ||
| } | ||
|
|
||
| protected: | ||
|
|
||
| private: | ||
| cl_program _program; | ||
| cl_kernel _kernel; | ||
| cl_int _numKernels; | ||
| std::map<std::string, cl_kernel> _kernels; | ||
| std::string _filesPath; | ||
|
|
||
| cl_context* _pContext; | ||
| cl_command_queue* _pQueue; | ||
| cl_device_id* _pDeviceID; | ||
|
|
||
| cl_int _status; | ||
| cl_bool _buildState; | ||
| }; | ||
|
|
||
| #endif // PROGRAM_H |
| @@ -0,0 +1,19 @@ | ||
| #ifndef SAMPLER_H | ||
| #define SAMPLER_H | ||
| #include <CL/cl.h> | ||
|
|
||
| class Sampler | ||
| { | ||
| public: | ||
| Sampler(cl_context* /* context */, | ||
| cl_bool /* normalized_coords */, | ||
| cl_addressing_mode /* addressing_mode */, | ||
| cl_filter_mode /* filter_mode */); | ||
| cl_sampler& getSampler(); | ||
| protected: | ||
| private: | ||
| cl_context* _pContext; | ||
| cl_sampler _sampler; | ||
| }; | ||
|
|
||
| #endif // SAMPLER_H |
| @@ -0,0 +1,104 @@ | ||
| #include "include/iv_common.h" | ||
|
|
||
| const char * getCLErrorString(int err) | ||
| { | ||
| // /F_LOG; | ||
| switch(err) | ||
| { | ||
| case 0: return "CL_SUCCESS"; | ||
| case -1: return "CL_DEVICE_NOT_FOUND"; | ||
| case -2: return "CL_DEVICE_NOT_AVAILABLE"; | ||
| case -3: return "CL_COMPILER_NOT_AVAILABLE"; | ||
| case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; | ||
| case -5: return "CL_OUT_OF_RESOURCES"; | ||
| case -6: return "CL_OUT_OF_HOST_MEMORY"; | ||
| case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE"; | ||
| case -8: return "CL_MEM_COPY_OVERLAP"; | ||
| case -9: return "CL_IMAGE_FORMAT_MISMATCH"; | ||
| case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; | ||
| case -11: return "CL_BUILD_Program_FAILURE"; | ||
| case -12: return "CL_MAP_FAILURE"; | ||
|
|
||
| case -30: return "CL_INVALID_VALUE"; | ||
| case -31: return "CL_INVALID_DEVICE_TYPE"; | ||
| case -32: return "CL_INVALID_PLATFORM"; | ||
| case -33: return "CL_INVALID_DEVICE"; | ||
| case -34: return "CL_INVALID_mContext"; | ||
| case -35: return "CL_INVALID_mQueue_PROPERTIES"; | ||
| case -36: return "CL_INVALID_COMMAND_mQueue"; | ||
| case -37: return "CL_INVALID_HOST_PTR"; | ||
| case -38: return "CL_INVALID_MEM_OBJECT"; | ||
| case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; | ||
| case -40: return "CL_INVALID_IMAGE_SIZE"; | ||
| case -41: return "CL_INVALID_SAMPLER"; | ||
| case -42: return "CL_INVALID_BINARY"; | ||
| case -43: return "CL_INVALID_BUILD_OPTIONS"; | ||
| case -44: return "CL_INVALID_Program"; | ||
| case -45: return "CL_INVALID_Program_EXECUTABLE"; | ||
| case -46: return "CL_INVALID_KERNEL_NAME"; | ||
| case -47: return "CL_INVALID_KERNEL_DEFINITION"; | ||
| case -48: return "CL_INVALID_KERNEL"; | ||
| case -49: return "CL_INVALID_ARG_mINDEX"; | ||
| case -50: return "CL_INVALID_ARG_VALUE"; | ||
| case -51: return "CL_INVALID_ARG_SIZE"; | ||
| case -52: return "CL_INVALID_KERNEL_ARGS"; | ||
| case -53: return "CL_INVALID_WORK_DIMENSION"; | ||
| case -54: return "CL_INVALID_WORK_GROUP_SIZE"; | ||
| case -55: return "CL_INVALID_WORK_mITEM_SIZE"; | ||
| case -56: return "CL_INVALID_GLOBAL_OFFSET"; | ||
| case -57: return "CL_INVALID_EVENT_WAIT_LIST"; | ||
| case -58: return "CL_INVALID_EVENT"; | ||
| case -59: return "CL_INVALID_OPERATION"; | ||
| case -60: return "CL_INVALID_GL_OBJECT"; | ||
| case -61: return "CL_INVALID_mBuffer_SIZE"; | ||
| case -62: return "CL_INVALID_MIP_LEVEL"; | ||
| case -63: return "CL_INVALID_GLOBAL_WORK_SIZE"; | ||
| default: return "Unknown OpenCL error"; | ||
| } | ||
| } | ||
|
|
||
|
|
||
| const char* getCustomErrorString(int err, IV_ERRORS_INFO info) | ||
| { | ||
| switch(info) | ||
| { | ||
| case IV_CHAR_FILE_OPEN_FAIL: return "IV_CHAR_FILE_OPEN_FAIL"; | ||
| case IV_CHAR_FILE_STATUS_FAIL: return "IV_CHAR_FILE_STATUS_FAIL"; | ||
| default: | ||
| return "IV_UNKNOWN_ERROR"; | ||
| } | ||
| } | ||
|
|
||
|
|
||
| /** | ||
| * @brief CLRuntime::getImgType // take number | ||
| * image type number (from cv::Mat.type()), get OpenCV's enum string. | ||
| * @param imgTypeInt | ||
| * @return | ||
| */ | ||
| std::string getImgType(int imgTypeInt) | ||
| { | ||
| int numImgTypes = 35; // 7 base types, with five channel options each (none or C1, ..., C4) | ||
|
|
||
| int enum_Ints[] = {CV_8U, CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4, | ||
| CV_8S, CV_8SC1, CV_8SC2, CV_8SC3, CV_8SC4, | ||
| CV_16U, CV_16UC1, CV_16UC2, CV_16UC3, CV_16UC4, | ||
| CV_16S, CV_16SC1, CV_16SC2, CV_16SC3, CV_16SC4, | ||
| CV_32S, CV_32SC1, CV_32SC2, CV_32SC3, CV_32SC4, | ||
| CV_32F, CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4, | ||
| CV_64F, CV_64FC1, CV_64FC2, CV_64FC3, CV_64FC4}; | ||
|
|
||
| std::string enum_strings[] = {"CV_8U", "CV_8UC1", "CV_8UC2", "CV_8UC3", "CV_8UC4", | ||
| "CV_8S", "CV_8SC1", "CV_8SC2", "CV_8SC3", "CV_8SC4", | ||
| "CV_16U", "CV_16UC1", "CV_16UC2", "CV_16UC3", "CV_16UC4", | ||
| "CV_16S", "CV_16SC1", "CV_16SC2", "CV_16SC3", "CV_16SC4", | ||
| "CV_32S", "CV_32SC1", "CV_32SC2", "CV_32SC3", "CV_32SC4", | ||
| "CV_32F", "CV_32FC1", "CV_32FC2", "CV_32FC3", "CV_32FC4", | ||
| "CV_64F", "CV_64FC1", "CV_64FC2", "CV_64FC3", "CV_64FC4"}; | ||
|
|
||
| for(int i=0; i<numImgTypes; i++) | ||
| { | ||
| if(imgTypeInt == enum_Ints[i]) return enum_strings[i]; | ||
| } | ||
| return "unknown image type"; | ||
| } |
| @@ -0,0 +1,206 @@ | ||
| #include "include/kernel_launcher.h" | ||
|
|
||
| KernelLauncher::KernelLauncher(cl_device_id device_id, cl_kernel *kernel, cl_command_queue *queue, std::string kernelName) | ||
| { | ||
| cl_int status; | ||
| this->_pKernel = kernel; | ||
| this->_pQueue = queue; | ||
| this->_dimensions = -1; | ||
| _device_id = device_id; | ||
| _globalWorkSize[0] = _globalWorkSize[1] = _globalWorkSize[2] = | ||
| _localWorkSize[0] = _localWorkSize[1] = _localWorkSize[2] = NULL; | ||
|
|
||
| //Finding number of arguments in given kernel and making | ||
| //an bool array to track its data content | ||
| status = clGetKernelInfo(*_pKernel, CL_KERNEL_NUM_ARGS, sizeof(cl_int), &_numArgs, NULL); | ||
| DEBUG_CL(status); | ||
| printf("Number of kernel Arguments : %d %s \n",_numArgs, kernelName.c_str()); | ||
| this->_argListData = (cl_bool*) malloc(_numArgs*sizeof(cl_bool));//new cl_bool[numArgs]; | ||
| for(int i=0; i<_numArgs; i++) | ||
| this->_argListData[i] = false; | ||
|
|
||
| cl_int err = clGetKernelWorkGroupInfo(*_pKernel, _device_id, | ||
| CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &_optimal_local_workgroup_size, NULL); | ||
| //err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL); | ||
|
|
||
| size_t compileWorkGroupSize[3]; | ||
| err = clGetKernelWorkGroupInfo(*_pKernel, _device_id, | ||
| CL_KERNEL_COMPILE_WORK_GROUP_SIZE, | ||
| sizeof(size_t) * 3, | ||
| compileWorkGroupSize, | ||
| NULL); | ||
|
|
||
| _kernel_name = kernelName; | ||
|
|
||
| } | ||
|
|
||
| int KernelLauncher::countArgs() | ||
| { | ||
| int ret=0; | ||
|
|
||
| for(int i=0; i<_numArgs; i++) | ||
| if(_argListData[i]) | ||
| ret++; | ||
|
|
||
| return ret; | ||
| } | ||
|
|
||
| float KernelLauncher::run(bool profile, bool block) | ||
| { | ||
| double total = 0; | ||
| cl_int status = CL_SUCCESS; | ||
| cl_event execEvent = NULL; | ||
|
|
||
| status = clEnqueueNDRangeKernel(*_pQueue, *_pKernel, _dimensions, | ||
| NULL, _globalWorkSize, _localWorkSize, 0, | ||
| NULL, (profile? &execEvent: NULL)); | ||
|
|
||
| if (status == CL_SUCCESS && block) | ||
| clFinish(*_pQueue); | ||
|
|
||
| if (profile && status == CL_SUCCESS) { | ||
|
|
||
| clWaitForEvents(1, &execEvent); | ||
| long long start, end; | ||
| status = clGetEventProfilingInfo(execEvent, CL_PROFILING_COMMAND_START, | ||
| sizeof(start), &start, NULL); | ||
| status = clGetEventProfilingInfo(execEvent, CL_PROFILING_COMMAND_END, | ||
| sizeof(end), &end, NULL); | ||
|
|
||
| total = (double)(end - start) / 1e6; /* Convert nanoseconds to msecs */ | ||
| printf("Total kernel time was {%5.3f} msecs - %s \n", total, _kernel_name.c_str()); | ||
|
|
||
| clReleaseEvent(execEvent); | ||
| } | ||
|
|
||
| if (status != CL_SUCCESS) { | ||
|
|
||
| char * kernelName = (char*)_kernel_name.c_str(); | ||
| int z = 0; | ||
| z++; | ||
| } | ||
|
|
||
| //DEBUG_CL(status); | ||
| return (float)total; | ||
| } | ||
| KernelLauncher& KernelLauncher::global(const int g) { | ||
| if (_dimensions == -1) _dimensions = 1; | ||
| else if (_dimensions != 1) { | ||
| std::cerr << "Work group dimension incoherence" << std::endl; | ||
| } | ||
| _globalWorkSize[0] = g; | ||
| return *this; | ||
| } | ||
|
|
||
| KernelLauncher& KernelLauncher::global(const int gx, const int gy) { | ||
| if (_dimensions == -1) _dimensions = 2; | ||
| else if (_dimensions != 2) { | ||
| std:: cerr << "Work group dimension incoherence" << std::endl; | ||
| } | ||
| _globalWorkSize[0] = gx; | ||
| _globalWorkSize[1] = gy; | ||
| return *this; | ||
| } | ||
|
|
||
| KernelLauncher& KernelLauncher::global(const int gx, const int gy, const int gz) { | ||
| if (_dimensions == -1) _dimensions = 3; | ||
| else if (_dimensions != 3) { | ||
| std::cerr << "Work group dimension incoherence" << std::endl; | ||
| } | ||
| _globalWorkSize[0] = gx; | ||
| _globalWorkSize[1] = gy; | ||
| _globalWorkSize[2] = gz; | ||
| return *this; | ||
| } | ||
|
|
||
| KernelLauncher& KernelLauncher::local(const int l) { | ||
| if (_dimensions == -1) _dimensions = 1; | ||
| else if (_dimensions != 1) { | ||
| std::cerr << "Work group dimension incoherence" << std::endl; | ||
| } | ||
| _localWorkSize[0] = l; | ||
| return *this; | ||
| } | ||
|
|
||
| KernelLauncher& KernelLauncher::local(const int lx, const int ly) { | ||
| if (_dimensions == -1) _dimensions = 2; | ||
| else if (_dimensions != 2) { | ||
| std::cerr << "Work group dimension incoherence" << std::endl; | ||
| } | ||
| _localWorkSize[0] = lx; | ||
| _localWorkSize[1] = ly; | ||
| return *this; | ||
| } | ||
|
|
||
| KernelLauncher& KernelLauncher::local(const int lx, const int ly, const int lz) { | ||
| if (_dimensions == -1) _dimensions = 3; | ||
| else if (_dimensions != 3) { | ||
| std::cerr << "Work group dimension incoherence" << std::endl; | ||
| } | ||
| _localWorkSize[0] = lx; | ||
| _localWorkSize[1] = ly; | ||
| _localWorkSize[2] = lz; | ||
| return *this; | ||
| } | ||
|
|
||
| ////////////////////////// | ||
| KernelLauncher* KernelLauncher::pGlobal(const int g) { | ||
| if (_dimensions == -1) _dimensions = 1; | ||
| else if (_dimensions != 1) { | ||
| std::cerr << "Work group dimension incoherence" << std::endl; | ||
| } | ||
| _globalWorkSize[0] = g; | ||
| return this; | ||
| } | ||
|
|
||
| KernelLauncher* KernelLauncher::pGlobal(const int gx, const int gy) { | ||
| if (_dimensions == -1) _dimensions = 2; | ||
| else if (_dimensions != 2) { | ||
| std::cerr << "Work group dimension incoherence" << std::endl; | ||
| } | ||
| _globalWorkSize[0] = gx; | ||
| _globalWorkSize[1] = gy; | ||
| return this; | ||
| } | ||
|
|
||
| KernelLauncher* KernelLauncher::pGlobal(const int gx, const int gy, const int gz) { | ||
| if (_dimensions == -1) _dimensions = 3; | ||
| else if (_dimensions != 3) { | ||
| std::cerr << "Work group dimension incoherence" << std::endl; | ||
| } | ||
| _globalWorkSize[0] = gx; | ||
| _globalWorkSize[1] = gy; | ||
| _globalWorkSize[2] = gz; | ||
| return this; | ||
| } | ||
|
|
||
| KernelLauncher* KernelLauncher::pLocal(const int l) { | ||
| if (_dimensions == -1) _dimensions = 1; | ||
| else if (_dimensions != 1) { | ||
| std::cerr << "Work group dimension incoherence" << std::endl; | ||
| } | ||
| _localWorkSize[0] = l; | ||
| return this; | ||
| } | ||
|
|
||
| KernelLauncher* KernelLauncher::pLocal(const int lx, const int ly) { | ||
| if (_dimensions == -1) _dimensions = 2; | ||
| else if (_dimensions != 2) { | ||
| std::cerr << "Work group dimension incoherence" << std::endl; | ||
| } | ||
| _localWorkSize[0] = lx; | ||
| _localWorkSize[1] = ly; | ||
| return this; | ||
| } | ||
|
|
||
| KernelLauncher* KernelLauncher::pLocal(const int lx, const int ly, const int lz) { | ||
| if (_dimensions == -1) _dimensions = 3; | ||
| else if (_dimensions != 3) { | ||
| std::cerr << "Work group dimension incoherence" << std::endl; | ||
| } | ||
| _localWorkSize[0] = lx; | ||
| _localWorkSize[1] = ly; | ||
| _localWorkSize[2] = lz; | ||
| return this; | ||
| } | ||
|
|
| @@ -0,0 +1,142 @@ | ||
| #include "include/program.h" | ||
| #include "include/oclUtils.h" | ||
|
|
||
|
|
||
| int ReadSourceFromFile(const char* fileName, char** source, size_t* sourceSize) | ||
| { | ||
| int errorCode = CL_SUCCESS; | ||
|
|
||
| FILE* fp = fopen(fileName, "rb"); | ||
| if (fp == NULL) | ||
| { | ||
| printf("Error: Couldn't find program source file '%s'.\n", fileName); | ||
| errorCode = CL_INVALID_VALUE; | ||
| } | ||
| else { | ||
| fseek(fp, 0, SEEK_END); | ||
| *sourceSize = ftell(fp); | ||
| fseek(fp, 0, SEEK_SET); | ||
|
|
||
| *source = new char[*sourceSize]; | ||
| if (*source == NULL) | ||
| { | ||
| printf("Error: Couldn't allocate %d bytes for program source from file '%s'.\n", (int)(*sourceSize), fileName); | ||
| errorCode = CL_OUT_OF_HOST_MEMORY; | ||
| } | ||
| else { | ||
| fread(*source, 1, *sourceSize, fp); | ||
| } | ||
| } | ||
| return errorCode; | ||
| } | ||
|
|
||
|
|
||
| void checkErr( cl_int err,int line, const char *n, bool verbosity=false ) { | ||
| if( err != CL_SUCCESS ) { | ||
| std::cerr << n << "\r\t\t\t\t\t\tline:" << line<<" "<<oclErrorString(err) << std::endl; | ||
| //assert(0); | ||
| } | ||
| else if( n != NULL ) { | ||
| if( verbosity) std::cerr << n << "\r\t\t\t\t\t\t" << "OK" <<std::endl; | ||
|
|
||
| } | ||
| } | ||
|
|
||
| Program::Program(std::vector<std::string> &kernelFilePath, cl_context *context, cl_command_queue *queue, cl_device_id *device) | ||
| //Program::Program(std::string &kernelFilePath, cl_context *context, cl_command_queue *queue, cl_device_id *device) | ||
| { | ||
| this->_pContext = context; | ||
| this->_pQueue = queue; | ||
| this->_pDeviceID = device; | ||
|
|
||
| std::ifstream programFile(kernelFilePath[0].c_str()); | ||
| //std::ifstream programFile(kernelFilePath.c_str()); | ||
| std::string programBuffer(std::istreambuf_iterator<char>(programFile), | ||
| (std::istreambuf_iterator<char>())); | ||
| if(programBuffer.empty()) | ||
| { | ||
| std::cout<<"Kernel File Not Found in specified location!"<<std::endl; | ||
| } | ||
| size_t programSize = programBuffer.size(); | ||
|
|
||
|
|
||
| char* source = NULL; | ||
| size_t src_size = 0; | ||
| cl_int err = CL_SUCCESS; | ||
| err = ReadSourceFromFile(kernelFilePath[0].c_str(), &source, &src_size); | ||
|
|
||
| //_program = clCreateProgramWithSource((*_pContext), 1,(const char **)&programBuffer, &programSize, &_status); | ||
| _program = clCreateProgramWithSource((*_pContext), 1, (const char **)&source, &src_size, &_status); | ||
| //DEBUG_CL(_status); | ||
| checkErr(_status, __LINE__,"clCreateProgramWithSource"); | ||
| if( _status != CL_SUCCESS ) | ||
| printf("clCreateProgramWithSource ERROR - %s\n", oclErrorString(_status)); | ||
| else | ||
| printf("clCreateProgramWithSource success\n"); | ||
|
|
||
|
|
||
|
|
||
| _buildState = false; | ||
| } | ||
|
|
||
|
|
||
| void Program::buildProgram() | ||
| { | ||
| char *programLog; | ||
| size_t programLogSize; | ||
| //const char options[] = "-cl-std=CL1.0 -cl-mad-enable -Werror"; | ||
| //const char options[] = "-cl-mad-enable -Werror"; | ||
| const char options[] = "-cl-mad-enable -cl-unsafe-math-optimizations -cl-fast-relaxed-math -cl-single-precision-constant -cl-no-signed-zeros"; | ||
| _status= clBuildProgram(_program, 1, _pDeviceID, options, NULL, NULL); | ||
| if(_status<0) | ||
| { | ||
| clGetProgramBuildInfo(_program, *_pDeviceID, CL_PROGRAM_BUILD_LOG, 0, NULL, &programLogSize ); | ||
| programLog = (char*)malloc(sizeof(char)*programLogSize+1); | ||
| clGetProgramBuildInfo(_program, *_pDeviceID, CL_PROGRAM_BUILD_LOG, programLogSize+1, programLog, NULL); | ||
| printf("\nBuild Log :%s\n",programLog); | ||
| free(programLog); | ||
| // exit(0); ///!TODO: Custom Code | ||
| } | ||
| DEBUG_CL(_status); | ||
| if( _status != CL_SUCCESS ) | ||
| printf("clGetProgramBuildInfo() ERROR - %s\n", oclErrorString(_status)); | ||
| else | ||
| printf("clGetProgramBuildInfo() success\n"); | ||
|
|
||
| //_kernel = clCreateKernel(_program, kernelName.c_str(), &_status); | ||
| //DEBUG_CL(_status); | ||
| // Creates the kernels | ||
| // Needs to verify if the file compiled is actually a kernel | ||
| _status = clCreateKernelsInProgram(_program, 0, NULL, (cl_uint*)&(_numKernels)); | ||
| cl_kernel* k = new cl_kernel[_numKernels]; | ||
| _status = clCreateKernelsInProgram(_program, _numKernels, k, NULL); | ||
| DEBUG_CL(_status); | ||
|
|
||
| // Creates the hash with the kernels | ||
| for (int i = 0; i < _numKernels; i++) { | ||
| char name[256]; | ||
| _status = clGetKernelInfo(k[i], CL_KERNEL_FUNCTION_NAME, sizeof(char)*256, (void*) name, NULL); | ||
| //DEBUG_CL(_status); | ||
| if( _status != CL_SUCCESS ) | ||
| printf("buildProgram ERROR - %s\n", oclErrorString(_status)); | ||
| else | ||
| printf("buildProgram kernels success\n"); | ||
| _kernels[name] = k[i]; | ||
| printf("Kernel No: %d, name - %s\n", i+1, name); | ||
| //DEBUG_VALUE("Kernel Name: ", name); | ||
| } | ||
|
|
||
| _buildState = true; | ||
|
|
||
| } | ||
|
|
||
| KernelLauncher* Program::createKernelLauncher(std::string kernelName) | ||
| { | ||
| if(!_buildState) | ||
| ERROR_PRINT_STRING("You forgot to build the kernel"); | ||
|
|
||
| /// @TIPS: Always use a pointer to an variable that needs to be returned | ||
| KernelLauncher *kl = new KernelLauncher(*this->_pDeviceID, &_kernels[kernelName], _pQueue, kernelName); | ||
| return kl; | ||
| } | ||
|
|
| @@ -0,0 +1,21 @@ | ||
| #include "include/sampler.h" | ||
| #include "include/iv_common.h" | ||
|
|
||
| Sampler::Sampler(cl_context* context, cl_bool normalizedCoords, cl_addressing_mode addrMode, cl_filter_mode filterMode) | ||
| { | ||
| // clCreateSampler(cl_context /* context */, | ||
| // cl_bool /* normalized_coords */, | ||
| // cl_addressing_mode /* addressing_mode */, | ||
| // cl_filter_mode /* filter_mode */, | ||
| // cl_int * /* errcode_ret */) | ||
| // Create the image sampler | ||
| cl_int status; | ||
| _sampler = clCreateSampler(*context, normalizedCoords, | ||
| addrMode, filterMode, &status); | ||
| DEBUG_CL(status); | ||
| } | ||
|
|
||
| cl_sampler& Sampler::getSampler() | ||
| { | ||
| return _sampler; | ||
| } |
| @@ -0,0 +1,28 @@ | ||
| #pragma once | ||
| #ifndef OCL_H | ||
| #define OCL_H | ||
|
|
||
| #ifdef OPENCL | ||
| #include "CL/cl.h" | ||
|
|
||
|
|
||
| //#include "additionally.h" | ||
|
|
||
| #ifdef __cplusplus | ||
| extern "C" { | ||
| #endif | ||
|
|
||
| bool ocl_initialize(); | ||
| void ocl_push_array(cl_mem x_gpu, float *x, size_t n); | ||
| cl_mem ocl_make_array(float *x, size_t n); | ||
| cl_mem ocl_make_int_array(size_t n); | ||
| void ocl_push_convolutional_layer(convolutional_layer layer); | ||
|
|
||
|
|
||
| #ifdef __cplusplus | ||
| } | ||
| #endif | ||
|
|
||
| #endif // OPENCL | ||
|
|
||
| #endif // OCL_H |
| @@ -0,0 +1,28 @@ | ||
|
|
||
| Microsoft Visual Studio Solution File, Format Version 12.00 | ||
| # Visual Studio 14 | ||
| VisualStudioVersion = 14.0.25420.1 | ||
| MinimumVisualStudioVersion = 10.0.40219.1 | ||
| Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "yolo_ocl", "yolo_ocl.vcxproj", "{58803E1F-DBC5-4332-87B4-FF90E6E1E2A8}" | ||
| EndProject | ||
| Global | ||
| GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||
| Debug|x64 = Debug|x64 | ||
| Debug|x86 = Debug|x86 | ||
| Release|x64 = Release|x64 | ||
| Release|x86 = Release|x86 | ||
| EndGlobalSection | ||
| GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||
| {58803E1F-DBC5-4332-87B4-FF90E6E1E2A8}.Debug|x64.ActiveCfg = Debug|x64 | ||
| {58803E1F-DBC5-4332-87B4-FF90E6E1E2A8}.Debug|x64.Build.0 = Debug|x64 | ||
| {58803E1F-DBC5-4332-87B4-FF90E6E1E2A8}.Debug|x86.ActiveCfg = Debug|Win32 | ||
| {58803E1F-DBC5-4332-87B4-FF90E6E1E2A8}.Debug|x86.Build.0 = Debug|Win32 | ||
| {58803E1F-DBC5-4332-87B4-FF90E6E1E2A8}.Release|x64.ActiveCfg = Release|x64 | ||
| {58803E1F-DBC5-4332-87B4-FF90E6E1E2A8}.Release|x64.Build.0 = Release|x64 | ||
| {58803E1F-DBC5-4332-87B4-FF90E6E1E2A8}.Release|x86.ActiveCfg = Release|Win32 | ||
| {58803E1F-DBC5-4332-87B4-FF90E6E1E2A8}.Release|x86.Build.0 = Release|Win32 | ||
| EndGlobalSection | ||
| GlobalSection(SolutionProperties) = preSolution | ||
| HideSolutionNode = FALSE | ||
| EndGlobalSection | ||
| EndGlobal |
| @@ -0,0 +1,157 @@ | ||
| <?xml version="1.0" encoding="utf-8"?> | ||
| <Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> | ||
| <ItemGroup Label="ProjectConfigurations"> | ||
| <ProjectConfiguration Include="Debug|Win32"> | ||
| <Configuration>Debug</Configuration> | ||
| <Platform>Win32</Platform> | ||
| </ProjectConfiguration> | ||
| <ProjectConfiguration Include="Release|Win32"> | ||
| <Configuration>Release</Configuration> | ||
| <Platform>Win32</Platform> | ||
| </ProjectConfiguration> | ||
| <ProjectConfiguration Include="Debug|x64"> | ||
| <Configuration>Debug</Configuration> | ||
| <Platform>x64</Platform> | ||
| </ProjectConfiguration> | ||
| <ProjectConfiguration Include="Release|x64"> | ||
| <Configuration>Release</Configuration> | ||
| <Platform>x64</Platform> | ||
| </ProjectConfiguration> | ||
| </ItemGroup> | ||
| <PropertyGroup Label="Globals"> | ||
| <ProjectGuid>{58803E1F-DBC5-4332-87B4-FF90E6E1E2A8}</ProjectGuid> | ||
| <RootNamespace>yolo_cpu</RootNamespace> | ||
| <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion> | ||
| </PropertyGroup> | ||
| <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> | ||
| <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> | ||
| <ConfigurationType>Application</ConfigurationType> | ||
| <UseDebugLibraries>true</UseDebugLibraries> | ||
| <PlatformToolset>v140</PlatformToolset> | ||
| <CharacterSet>MultiByte</CharacterSet> | ||
| </PropertyGroup> | ||
| <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> | ||
| <ConfigurationType>Application</ConfigurationType> | ||
| <UseDebugLibraries>false</UseDebugLibraries> | ||
| <PlatformToolset>v140</PlatformToolset> | ||
| <WholeProgramOptimization>true</WholeProgramOptimization> | ||
| <CharacterSet>MultiByte</CharacterSet> | ||
| </PropertyGroup> | ||
| <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> | ||
| <ConfigurationType>Application</ConfigurationType> | ||
| <UseDebugLibraries>true</UseDebugLibraries> | ||
| <PlatformToolset>v140</PlatformToolset> | ||
| <CharacterSet>MultiByte</CharacterSet> | ||
| </PropertyGroup> | ||
| <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> | ||
| <ConfigurationType>Application</ConfigurationType> | ||
| <UseDebugLibraries>false</UseDebugLibraries> | ||
| <PlatformToolset>v140</PlatformToolset> | ||
| <WholeProgramOptimization>true</WholeProgramOptimization> | ||
| <CharacterSet>MultiByte</CharacterSet> | ||
| </PropertyGroup> | ||
| <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> | ||
| <ImportGroup Label="ExtensionSettings"> | ||
| </ImportGroup> | ||
| <ImportGroup Label="Shared"> | ||
| </ImportGroup> | ||
| <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> | ||
| <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> | ||
| </ImportGroup> | ||
| <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> | ||
| <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> | ||
| </ImportGroup> | ||
| <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> | ||
| <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> | ||
| </ImportGroup> | ||
| <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> | ||
| <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> | ||
| </ImportGroup> | ||
| <PropertyGroup Label="UserMacros" /> | ||
| <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> | ||
| <OutDir>bin\</OutDir> | ||
| <IntDir>$(Platform)\ocl_$(Configuration)\</IntDir> | ||
| </PropertyGroup> | ||
| <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> | ||
| <OutDir>bin\</OutDir> | ||
| </PropertyGroup> | ||
| <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> | ||
| <ClCompile> | ||
| <WarningLevel>Level3</WarningLevel> | ||
| <Optimization>Disabled</Optimization> | ||
| <SDLCheck>true</SDLCheck> | ||
| </ClCompile> | ||
| </ItemDefinitionGroup> | ||
| <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> | ||
| <ClCompile> | ||
| <WarningLevel>Level3</WarningLevel> | ||
| <Optimization>Disabled</Optimization> | ||
| <SDLCheck>true</SDLCheck> | ||
| </ClCompile> | ||
| </ItemDefinitionGroup> | ||
| <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> | ||
| <ClCompile> | ||
| <WarningLevel>Level3</WarningLevel> | ||
| <Optimization>MaxSpeed</Optimization> | ||
| <FunctionLevelLinking>true</FunctionLevelLinking> | ||
| <IntrinsicFunctions>true</IntrinsicFunctions> | ||
| <SDLCheck>true</SDLCheck> | ||
| </ClCompile> | ||
| <Link> | ||
| <EnableCOMDATFolding>true</EnableCOMDATFolding> | ||
| <OptimizeReferences>true</OptimizeReferences> | ||
| </Link> | ||
| </ItemDefinitionGroup> | ||
| <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> | ||
| <ClCompile> | ||
| <WarningLevel>Level3</WarningLevel> | ||
| <Optimization>MaxSpeed</Optimization> | ||
| <FunctionLevelLinking>true</FunctionLevelLinking> | ||
| <IntrinsicFunctions>true</IntrinsicFunctions> | ||
| <SDLCheck>true</SDLCheck> | ||
| <AdditionalIncludeDirectories>C:\opencv_3.0\opencv\build\include;3rdparty\include;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0\include;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\include;3rdparty\CLBlast\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> | ||
| <PreprocessorDefinitions>OPENCL;OPENCV;_TIMESPEC_DEFINED;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions> | ||
| <OpenMPSupport>true</OpenMPSupport> | ||
| <MultiProcessorCompilation>true</MultiProcessorCompilation> | ||
| </ClCompile> | ||
| <Link> | ||
| <EnableCOMDATFolding>true</EnableCOMDATFolding> | ||
| <OptimizeReferences>true</OptimizeReferences> | ||
| <AdditionalLibraryDirectories>C:\opencv_3.0\opencv\build\x64\vc14\lib;C:\opencv_2.4.13\opencv\build\x64\vc12\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories> | ||
| <AdditionalDependencies>3rdparty\lib\x64\pthreadVC2.lib;3rdparty/CLBlast/Release/clblast.lib;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v9.0\lib\x64\OpenCL.lib;%(AdditionalDependencies)</AdditionalDependencies> | ||
| </Link> | ||
| </ItemDefinitionGroup> | ||
| <ItemGroup> | ||
| <ClCompile Include="src\additionally.c" /> | ||
| <ClCompile Include="src\box.c" /> | ||
| <ClCompile Include="src\main.c" /> | ||
| <ClCompile Include="src\OpenCL\buffer.cpp" /> | ||
| <ClCompile Include="src\OpenCL\clutils.cpp" /> | ||
| <ClCompile Include="src\OpenCL\cl_wrapper.cpp" /> | ||
| <ClCompile Include="src\OpenCL\helpers.cpp" /> | ||
| <ClCompile Include="src\OpenCL\image2D.cpp" /> | ||
| <ClCompile Include="src\OpenCL\iv_common.cpp" /> | ||
| <ClCompile Include="src\OpenCL\kernel_launcher.cpp" /> | ||
| <ClCompile Include="src\OpenCL\OCLManager.cpp" /> | ||
| <ClCompile Include="src\OpenCL\oclUtils.cpp" /> | ||
| <ClCompile Include="src\OpenCL\program.cpp" /> | ||
| <ClCompile Include="src\OpenCL\sampler.cpp" /> | ||
| <ClCompile Include="src\OpenCL\shrUtils.cpp" /> | ||
| <ClCompile Include="src\yolov2_forward_network.c" /> | ||
| <ClCompile Include="src\yolov2_forward_network_ocl.cpp" /> | ||
| </ItemGroup> | ||
| <ItemGroup> | ||
| <ClInclude Include="src\additionally.h" /> | ||
| <ClInclude Include="src\box.h" /> | ||
| <ClInclude Include="src\ocl.h" /> | ||
| <ClInclude Include="src\OpenCL\include\buffer.h" /> | ||
| <ClInclude Include="src\stb_image.h" /> | ||
| <ClInclude Include="src\stb_image_write.h" /> | ||
| </ItemGroup> | ||
| <ItemGroup> | ||
| <None Include="bin\DeepNNFP32.cl" /> | ||
| </ItemGroup> | ||
| <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> | ||
| <ImportGroup Label="ExtensionTargets"> | ||
| </ImportGroup> | ||
| </Project> |