From 15d0ffaac2b543ee3afad7ebbca5ceb7bcfabaad Mon Sep 17 00:00:00 2001 From: "Dakota St. Laurent" Date: Tue, 7 Jul 2015 17:14:07 -0400 Subject: [PATCH] use system header files instead of local ones --- .gitignore | 3 --- README.md | 26 +++++++++++++++++++++----- example00/main.cpp | 6 +++++- example01/README.md | 10 ---------- example01/main.cpp | 6 +++++- example02/main.cpp | 13 ++++++++----- 6 files changed, 39 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index d55fe9e..f99e19e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,3 @@ -# openCL C++ headers -CL/ - # compiled files *.out diff --git a/README.md b/README.md index fb1f9d7..d9615f3 100644 --- a/README.md +++ b/README.md @@ -2,13 +2,29 @@ here is my feeble attempt at learning OpenCL, please don't make fun of me too much :hamburger: ## Configuration -This currently runs on OS X, and I'm using local header files instead of global header files because I'm unfamiliar with C++. Deal with it. Run the following in a terminal to set up: +This code uses OpenCL 1.1 on a NVIDIA GPU. + +### Linux +(Only tested on Ubuntu). For NVIDIA GPUs, I've installed the following packages: `nvidia-346 nvidia-346-dev nvidia-346-uvm nvidia-libopencl1-346 nvidia-modprobe nvidia-opencl-icd-346 nvidia-settings`. Since the `opencl-headers` package in the main repository is for OpenCL 1.2, you can get the OpenCL 1.1 header files from [here](http://packages.ubuntu.com/precise/opencl-headers). + +Then to compile: + +``` +g++ -std=c++0x main.cpp -o main.out -lOpenCL +``` + +### OS X +OpenCL is installed on OS X by default, but since this code uses the C++ bindings, you'll need to get that too. Get the [official C++ bindings from the OpenCL registr](https://www.khronos.org/registry/cl/api/1.1/cl.hpp) and copy it to the OpenCL framework directory, or do the following: + +``` +wget https://www.khronos.org/registry/cl/api/1.1/cl.hpp +sudo cp cl.hpp /System/Library/Frameworks/OpenCL.framework/Headers/ +``` + +To compile: ``` -git clone git@github.com:SaintDako/OpenCL-examples.git -cd OpenCL-examples -mkdir CL -curl https://www.khronos.org/registry/cl/api/1.2/cl.hpp -o CL/cl.hpp +clang++ -std=c++0x -framework OpenCL main.cpp -o main.out ``` ## example 00 diff --git a/example00/main.cpp b/example00/main.cpp index 17c6e82..650cfda 100644 --- a/example00/main.cpp +++ b/example00/main.cpp @@ -1,5 +1,9 @@ #include -#include "../CL/cl.hpp" +#ifdef __APPLE__ + #include +#else + #include +#endif int main() { // get all platforms (drivers), e.g. NVIDIA diff --git a/example01/README.md b/example01/README.md index 230871b..d922540 100644 --- a/example01/README.md +++ b/example01/README.md @@ -1,16 +1,6 @@ # Example 01 This example compares the timings of adding vectors on the CPU versus adding vectors on the GPU, the latter of which has different implementations. -## Compiling - -``` -clang++ -std=c++0x -framework OpenCL main.cpp -o main.out -``` - -To ignore deprecation warnings, add the flag `-Wno-deprecated-declarations`. - -Run from this directory, as a relative path is used for the OpenCL header file (for now). - ## About The code runs the following implementations of adding large vectors (131072 elements; 8 * 32 * 512). The vectors are added together 10000 times. diff --git a/example01/main.cpp b/example01/main.cpp index b523c7a..bc01ebc 100644 --- a/example01/main.cpp +++ b/example01/main.cpp @@ -1,6 +1,10 @@ #include #include -#include "../CL/cl.hpp" +#ifdef __APPLE__ + #include +#else + #include +#endif #define NUM_GLOBAL_WITEMS 1024 diff --git a/example02/main.cpp b/example02/main.cpp index 725a76e..a099813 100644 --- a/example02/main.cpp +++ b/example02/main.cpp @@ -1,7 +1,11 @@ #include #include #include -#include "../CL/cl.hpp" +#ifdef __APPLE__ + #include +#else + #include +#endif using namespace std; using namespace cl; @@ -25,7 +29,7 @@ Platform getPlatform() { } -Device getDevice(cl::Platform platform, int i, bool display=false) { +Device getDevice(Platform platform, int i, bool display=false) { /* Returns the deviced specified by the index i on platform. * If display is true, then all of the platforms are listed. */ @@ -59,7 +63,6 @@ int main() { Context context({default_device}); Program::Sources sources; - // calculates for each element; C = A + B std::string kernel_code= "void kernel multiply_by(global int* A, const int c) {" " A[get_global_id(0)] = c * A[get_global_id(0)];" @@ -76,12 +79,12 @@ int main() { CommandQueue queue(context, default_device); queue.enqueueWriteBuffer(buffer_A, CL_TRUE, 0, sizeof(int)*n, A); - Kernel multiply_by = cl::Kernel(program, "multiply_by"); + Kernel multiply_by = Kernel(program, "multiply_by"); multiply_by.setArg(0, buffer_A); for (int c=2; c<=c_max; c++) { multiply_by.setArg(1, c); - queue.enqueueNDRangeKernel(multiply_by, cl::NullRange, cl::NDRange(n), cl::NDRange(32)); + queue.enqueueNDRangeKernel(multiply_by, NullRange, NDRange(n), NDRange(32)); } queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, sizeof(int)*n, B);