From 15d0ffaac2b543ee3afad7ebbca5ceb7bcfabaad Mon Sep 17 00:00:00 2001
From: "Dakota St. Laurent" <d.h.stlaurent@gmail.com>
Date: Tue, 7 Jul 2015 17:14:07 -0400
Subject: [PATCH] use system header files instead of local ones

---
 .gitignore          |  3 ---
 README.md           | 26 +++++++++++++++++++++-----
 example00/main.cpp  |  6 +++++-
 example01/README.md | 10 ----------
 example01/main.cpp  |  6 +++++-
 example02/main.cpp  | 13 ++++++++-----
 6 files changed, 39 insertions(+), 25 deletions(-)
diff --git a/.gitignore b/.gitignore
index d55fe9e..f99e19e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,3 @@
-# openCL C++ headers
-CL/
-
 # compiled files
 *.out
 
diff --git a/README.md b/README.md
index fb1f9d7..d9615f3 100644
--- a/README.md
+++ b/README.md
@@ -2,13 +2,29 @@
 here is my feeble attempt at learning OpenCL, please don't make fun of me too much :hamburger:
 
 ## Configuration
-This currently runs on OS X, and I'm using local header files instead of global header files because I'm unfamiliar with C++. Deal with it. Run the following in a terminal to set up:
+This code uses OpenCL 1.1 on a NVIDIA GPU.
+
+### Linux
+(Only tested on Ubuntu). For NVIDIA GPUs, I've installed the following packages: `nvidia-346 nvidia-346-dev nvidia-346-uvm nvidia-libopencl1-346 nvidia-modprobe nvidia-opencl-icd-346 nvidia-settings`. Since the `opencl-headers` package in the main repository is for OpenCL 1.2, you can get the OpenCL 1.1 header files from [here](http://packages.ubuntu.com/precise/opencl-headers).
+
+Then to compile:
+
+```
+g++ -std=c++0x main.cpp -o main.out -lOpenCL
+```
+
+### OS X
+OpenCL is installed on OS X by default, but since this code uses the C++ bindings, you'll need to get that too. Get the [official C++ bindings from the OpenCL registr](https://www.khronos.org/registry/cl/api/1.1/cl.hpp) and copy it to the OpenCL framework directory, or do the following:
+
+```
+wget https://www.khronos.org/registry/cl/api/1.1/cl.hpp
+sudo cp cl.hpp /System/Library/Frameworks/OpenCL.framework/Headers/
+```
+
+To compile:
 
 ```
-git clone git@github.com:SaintDako/OpenCL-examples.git
-cd OpenCL-examples
-mkdir CL
-curl https://www.khronos.org/registry/cl/api/1.2/cl.hpp -o CL/cl.hpp
+clang++ -std=c++0x -framework OpenCL main.cpp -o main.out
 ```
 
 ## example 00
diff --git a/example00/main.cpp b/example00/main.cpp
index 17c6e82..650cfda 100644
--- a/example00/main.cpp
+++ b/example00/main.cpp
@@ -1,5 +1,9 @@
 #include <iostream>
-#include "../CL/cl.hpp"
+#ifdef __APPLE__
+    #include <OpenCL/cl.hpp>
+#else
+    #include <CL/cl.hpp>
+#endif
 
 int main() {
     // get all platforms (drivers), e.g. NVIDIA
diff --git a/example01/README.md b/example01/README.md
index 230871b..d922540 100644
--- a/example01/README.md
+++ b/example01/README.md
@@ -1,16 +1,6 @@
 # Example 01
 This example compares the timings of adding vectors on the CPU versus adding vectors on the GPU, the latter of which has different implementations.
 
-## Compiling
-
-```
-clang++ -std=c++0x -framework OpenCL main.cpp -o main.out
-```
-
-To ignore deprecation warnings, add the flag `-Wno-deprecated-declarations`.
-
-Run from this directory, as a relative path is used for the OpenCL header file (for now).
-
 ## About
 The code runs the following implementations of adding large vectors (131072 elements; 8 * 32 * 512). The vectors are added together 10000 times.
 
diff --git a/example01/main.cpp b/example01/main.cpp
index b523c7a..bc01ebc 100644
--- a/example01/main.cpp
+++ b/example01/main.cpp
@@ -1,6 +1,10 @@
 #include <iostream>
 #include <ctime>
-#include "../CL/cl.hpp"
+#ifdef __APPLE__
+    #include <OpenCL/cl.hpp>
+#else
+    #include <CL/cl.hpp>
+#endif
 
 #define NUM_GLOBAL_WITEMS 1024
 
diff --git a/example02/main.cpp b/example02/main.cpp
index 725a76e..a099813 100644
--- a/example02/main.cpp
+++ b/example02/main.cpp
@@ -1,7 +1,11 @@
 #include <iostream>
 #include <algorithm>
 #include <iterator>
-#include "../CL/cl.hpp"
+#ifdef __APPLE__
+    #include <OpenCL/cl.hpp>
+#else
+    #include <CL/cl.hpp>
+#endif
 
 using namespace std;
 using namespace cl;
@@ -25,7 +29,7 @@ Platform getPlatform() {
 }
 
 
-Device getDevice(cl::Platform platform, int i, bool display=false) {
+Device getDevice(Platform platform, int i, bool display=false) {
     /* Returns the deviced specified by the index i on platform.
      * If display is true, then all of the platforms are listed.
      */
@@ -59,7 +63,6 @@ int main() {
     Context context({default_device});
     Program::Sources sources;
 
-    // calculates for each element; C = A + B
     std::string kernel_code=
         "void kernel multiply_by(global int* A, const int c) {"
         "   A[get_global_id(0)] = c * A[get_global_id(0)];"
@@ -76,12 +79,12 @@ int main() {
     CommandQueue queue(context, default_device);
     queue.enqueueWriteBuffer(buffer_A, CL_TRUE, 0, sizeof(int)*n, A);
 
-    Kernel multiply_by = cl::Kernel(program, "multiply_by");
+    Kernel multiply_by = Kernel(program, "multiply_by");
     multiply_by.setArg(0, buffer_A);
 
     for (int c=2; c<=c_max; c++) {
         multiply_by.setArg(1, c);
-        queue.enqueueNDRangeKernel(multiply_by, cl::NullRange, cl::NDRange(n), cl::NDRange(32));
+        queue.enqueueNDRangeKernel(multiply_by, NullRange, NDRange(n), NDRange(32));
     }
 
     queue.enqueueReadBuffer(buffer_A, CL_TRUE, 0, sizeof(int)*n, B);