From 61a6180b95bd2093a63e867403f9f387efd6e52c Mon Sep 17 00:00:00 2001 From: Kevin Smith Date: Fri, 25 Mar 2011 11:27:41 -0400 Subject: [PATCH] Adding float buffer, Better Erlang API --- c_src/pcuda_buffer.h | 24 +++++++- c_src/pcuda_float_buffer.cpp | 111 +++++++++++++++++++++++++++++++++++ c_src/pcuda_ops.cu | 25 ++++++++ c_src/pcuda_ops.h | 6 ++ c_src/pteracuda_nifs.cpp | 15 +++++ src/pteracuda_buffer.erl | 62 +++++++++++++++++++ src/pteracuda_context.erl | 18 ++++++ src/pteracuda_demo.erl | 24 ++++++++ src/pteracuda_internals.hrl | 4 ++ src/pteracuda_nifs.erl | 54 +++++++++++++++++ 10 files changed, 342 insertions(+), 1 deletion(-) create mode 100644 c_src/pcuda_float_buffer.cpp create mode 100644 src/pteracuda_buffer.erl create mode 100644 src/pteracuda_context.erl create mode 100644 src/pteracuda_demo.erl create mode 100644 src/pteracuda_internals.hrl diff --git a/c_src/pcuda_buffer.h b/c_src/pcuda_buffer.h index 290bf86..95378a3 100644 --- a/c_src/pcuda_buffer.h +++ b/c_src/pcuda_buffer.h @@ -8,7 +8,8 @@ enum PCudaBufferTypes { BUF_TYPE_INTEGER, - BUF_TYPE_STRING + BUF_TYPE_STRING, + BUF_TYPE_FLOAT }; class PCudaBuffer { @@ -49,6 +50,27 @@ class PCudaIntBuffer : public PCudaBuffer { std::vector *data; }; +class PCudaFloatBuffer : public PCudaBuffer { +public: + PCudaFloatBuffer(); + virtual ~PCudaFloatBuffer(); + virtual unsigned int size(); + virtual PCudaBufferTypes type() { return BUF_TYPE_FLOAT; }; + virtual bool sort(); + virtual bool contains(ErlNifEnv *env, ERL_NIF_TERM rawTarget); + virtual ERL_NIF_TERM toErlTerms(ErlNifEnv *env); + virtual void write(ErlNifEnv *env, ERL_NIF_TERM data); + virtual void delete_at(unsigned long position); + virtual bool insert_at(unsigned long position, ErlNifEnv *env, ERL_NIF_TERM value); + virtual void clear(); + virtual bool copy(PCudaBuffer *src); + virtual ERL_NIF_TERM intersect(ErlNifEnv *env, PCudaBuffer *other); + virtual ERL_NIF_TERM minmax(ErlNifEnv *env); + +protected: + std::vector *data; +}; + class PCudaStringBuffer : public PCudaBuffer { public: PCudaStringBuffer(); diff --git a/c_src/pcuda_float_buffer.cpp b/c_src/pcuda_float_buffer.cpp new file mode 100644 index 0000000..323f432 --- /dev/null +++ b/c_src/pcuda_float_buffer.cpp @@ -0,0 +1,111 @@ +#include +#include "pcuda_buffer.h" +#include "pcuda_ops.h" + +PCudaFloatBuffer::PCudaFloatBuffer() { + this->data = new std::vector(); +} + +PCudaFloatBuffer::~PCudaFloatBuffer() { + delete this->data; +} + +unsigned int PCudaFloatBuffer::size() { + return this->data->size(); +} + +void PCudaFloatBuffer::write(ErlNifEnv *env, ERL_NIF_TERM data) { + ERL_NIF_TERM head; + double value; + + while (enif_get_list_cell(env, data, &head, &data)) { + if (enif_get_double(env, head, &value)) { + this->data->push_back(value); + } + } +} + +void PCudaFloatBuffer::delete_at(unsigned long position) { + std::vector::iterator iter = this->data->begin(); + for (unsigned long i = 0; i < position; i++) { + iter++; + } + this->data->erase(iter); +} + +bool PCudaFloatBuffer::insert_at(unsigned long position, ErlNifEnv *env, ERL_NIF_TERM rawValue) { + double value; + if (enif_get_double(env, rawValue, &value)) { + std::vector::iterator iter = this->data->begin(); + for (unsigned long i = 0; i < position; i++) { + iter++; + } + this->data->insert(iter, 1, value); + return true; + } + return false; +} + +bool PCudaFloatBuffer::sort() { + return pcuda_float_sort(this->data); +} + +bool PCudaFloatBuffer::contains(ErlNifEnv *env, ERL_NIF_TERM rawTarget) { + double target; + if (enif_get_double(env, rawTarget, &target)) { + return pcuda_float_binary_search(this->data, target); + } + else { + return false; + } +} + +ERL_NIF_TERM PCudaFloatBuffer::toErlTerms(ErlNifEnv *env) { + std::vector::iterator iter; + ERL_NIF_TERM retval = enif_make_list(env, 0); + if (this->data->size() > 0) { + for (iter = this->data->end(); iter != this->data->begin();) { + --iter; + retval = enif_make_list_cell(env, enif_make_double(env, *iter), retval); + } + } + return retval; +} + +void PCudaFloatBuffer::clear() { + this->data->clear(); +} + +bool PCudaFloatBuffer::copy(PCudaBuffer *src) { + if (src->type() == BUF_TYPE_FLOAT) { + PCudaFloatBuffer *source = (PCudaFloatBuffer *) src; + std::vector::iterator iter; + for (iter = source->data->begin(); iter != source->data->end(); ++iter) { + this->data->push_back(*iter); + } + return true; + } + return false; +} + +ERL_NIF_TERM PCudaFloatBuffer::intersect(ErlNifEnv *env, PCudaBuffer *otherBuffer) { + ERL_NIF_TERM retval = enif_make_list(env, 0); + std::vector intersection; + if (otherBuffer->type() == BUF_TYPE_FLOAT) { + PCudaFloatBuffer *other = (PCudaFloatBuffer *) otherBuffer; + pcuda_float_intersection(this->data, other->data, &intersection); + if (intersection.size() > 0) { + for (std::vector::iterator iter = intersection.end(); iter != intersection.begin();) { + --iter; + retval = enif_make_list_cell(env, enif_make_double(env, *iter), retval); + } + } + } + return retval; +} + +ERL_NIF_TERM PCudaFloatBuffer::minmax(ErlNifEnv *env) { + double minmax[2]; + pcuda_float_minmax(this->data, &minmax[0]); + return enif_make_tuple2(env, enif_make_long(env, minmax[0]), enif_make_long(env, minmax[1])); +} diff --git a/c_src/pcuda_ops.cu b/c_src/pcuda_ops.cu index d53fffa..e5c85d0 100644 --- a/c_src/pcuda_ops.cu +++ b/c_src/pcuda_ops.cu @@ -72,6 +72,13 @@ bool pcuda_integer_sort(std::vector *data) { return true; } +bool pcuda_float_sort(std::vector *data) { + thrust::device_vector device = *data; + thrust::sort(device.begin(), device.end()); + thrust::copy(device.begin(), device.end(), data->begin()); + return true; +} + bool pcuda_string_sort(std::vector *data) { printf("In pcuda_string_sort\n"); thrust::device_vector device; @@ -104,15 +111,33 @@ bool pcuda_integer_binary_search(std::vector *data, long target) { return thrust::binary_search(device.begin(), device.end(), target, thrust::less()); } +bool pcuda_float_binary_search(std::vector *data, double target) { + thrust::device_vector device = *data; + return thrust::binary_search(device.begin(), device.end(), target, thrust::less()); +} + void pcuda_integer_intersection(std::vector *first, std::vector *second, std::vector *intersection) { thrust::set_intersection(first->begin(), first->end(), second->begin(), second->end(), std::back_inserter(*intersection)); } +void pcuda_float_intersection(std::vector *first, std::vector *second, + std::vector *intersection) { + thrust::set_intersection(first->begin(), first->end(), + second->begin(), second->end(), std::back_inserter(*intersection)); +} + void pcuda_integer_minmax(std::vector *data, long *minmax) { thrust::pair::iterator, std::vector::iterator> result = thrust::minmax_element(data->begin(), data->end()); minmax[0] = *result.first; minmax[1] = *result.second; } + +void pcuda_float_minmax(std::vector *data, double *minmax) { + thrust::pair::iterator, + std::vector::iterator> result = thrust::minmax_element(data->begin(), data->end()); + minmax[0] = *result.first; + minmax[1] = *result.second; +} diff --git a/c_src/pcuda_ops.h b/c_src/pcuda_ops.h index 2b8d354..63f6686 100644 --- a/c_src/pcuda_ops.h +++ b/c_src/pcuda_ops.h @@ -9,5 +9,11 @@ bool pcuda_integer_binary_search(std::vector *data, long target); void pcuda_integer_intersection(std::vector *first, std::vector *second, std::vector *intersection); void pcuda_integer_minmax(std::vector *data, long *minmax); +bool pcuda_float_sort(std::vector *data); +bool pcuda_float_binary_search(std::vector *data, double target); +void pcuda_float_intersection(std::vector *first, std::vector *second, std::vector *intersection); +void pcuda_float_minmax(std::vector *data, double *minmax); + +// Work in progress bool pcuda_string_sort(std::vector *data); #endif diff --git a/c_src/pteracuda_nifs.cpp b/c_src/pteracuda_nifs.cpp index eb3d4b1..03805c7 100644 --- a/c_src/pteracuda_nifs.cpp +++ b/c_src/pteracuda_nifs.cpp @@ -35,6 +35,8 @@ extern "C" { ERL_NIF_TERM pteracuda_nifs_new_int_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); ERL_NIF_TERM pteracuda_nifs_new_string_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); + ERL_NIF_TERM pteracuda_nifs_new_float_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); + ERL_NIF_TERM pteracuda_nifs_destroy_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); ERL_NIF_TERM pteracuda_nifs_buffer_size(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]); @@ -55,6 +57,7 @@ extern "C" { {"destroy_context", 1, pteracuda_nifs_destroy_context}, {"new_int_buffer", 0, pteracuda_nifs_new_int_buffer}, {"new_string_buffer", 0, pteracuda_nifs_new_string_buffer}, + {"new_float_buffer", 0, pteracuda_nifs_new_float_buffer}, {"destroy_buffer", 1, pteracuda_nifs_destroy_buffer}, {"buffer_size", 1, pteracuda_nifs_buffer_size}, {"write_buffer", 2, pteracuda_nifs_write_buffer}, @@ -170,6 +173,18 @@ ERL_NIF_TERM pteracuda_nifs_new_string_buffer(ErlNifEnv *env, int argc, const ER return enif_make_tuple2(env, ATOM_OK, res); } +ERL_NIF_TERM pteracuda_nifs_new_float_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { + PCudaBufferRef *ref = (PCudaBufferRef *) enif_alloc_resource(pteracuda_buffer_resource, sizeof(PCudaBufferRef)); + if (!ref) { + return OOM_ERROR; + } + ref->buffer = new PCudaFloatBuffer(); + ref->destroyed = false; + ERL_NIF_TERM res = enif_make_resource(env, ref); + enif_release_resource(ref); + return enif_make_tuple2(env, ATOM_OK, res); +} + ERL_NIF_TERM pteracuda_nifs_destroy_buffer(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) { PCudaBufferRef *ref; if (argc != 1 || !enif_get_resource(env, argv[0], pteracuda_buffer_resource, (void **) &ref)) { diff --git a/src/pteracuda_buffer.erl b/src/pteracuda_buffer.erl new file mode 100644 index 0000000..479defc --- /dev/null +++ b/src/pteracuda_buffer.erl @@ -0,0 +1,62 @@ +-module(pteracuda_buffer). + +-include("pteracuda_internals.hrl"). + +-export([new/1, + destroy/1, + size/1, + write/2, + read/1, + duplicate/1, + clear/1, + sort/2, + contains/3, + intersection/3, + minmax/2]). + +new(integer) -> + {ok, Buf} = pteracuda_nifs:new_int_buffer(), + {ok, #pc_buffer{type=integer, ref=Buf}}; +new(float) -> + {ok, Buf} = pteracuda_nifs:new_float_buffer(), + {ok, #pc_buffer{type=float, ref=Buf}}; +new(string) -> + {ok, Buf} = pteracuda_nifs:new_string_buffer(), + {ok, #pc_buffer{type=string, ref=Buf}}. + +destroy(#pc_buffer{ref=Ref}) -> + pteracuda_nifs:destroy_buffer(Ref), + ok. + +size(#pc_buffer{ref=Ref}) -> + pteracuda_nifs:buffer_size(Ref). + +write(#pc_buffer{ref=Ref, type=Type}, Data) when Type =:= integer orelse + Type =:= string orelse + Type =:= float -> + pteracuda_nifs:write_buffer(Ref, Data). + +read(#pc_buffer{ref=Ref}) -> + pteracuda_nifs:read_buffer(Ref). + +duplicate(#pc_buffer{ref=Ref, type=Type}) when Type =:= integer orelse + Type =:= string orelse + Type =:= float -> + {ok, OtherBuf} = new(Type), + pteracuda_nifs:copy_buffer(Ref, OtherBuf#pc_buffer.ref), + {ok, OtherBuf}. + +clear(#pc_buffer{ref=Ref}) -> + pteracuda_nifs:clear_buffer(Ref). + +sort(#pc_context{ref=Ctx}, #pc_buffer{ref=Buf}) -> + pteracuda_nifs:sort_buffer(Ctx, Buf). + +contains(#pc_context{ref=Ctx}, #pc_buffer{ref=Buf}, Value) -> + pteracuda_nifs:buffer_contains(Ctx, Buf, Value). + +intersection(#pc_context{ref=Ctx}, #pc_buffer{ref=Buf1}, #pc_buffer{ref=Buf2}) -> + pteracuda_nifs:buffer_intersection(Ctx, Buf1, Buf2). + +minmax(#pc_context{ref=Ctx}, #pc_buffer{ref=Buf}) -> + pteracuda_nifs:buffer_minmax(Ctx, Buf). diff --git a/src/pteracuda_context.erl b/src/pteracuda_context.erl new file mode 100644 index 0000000..b8bd02a --- /dev/null +++ b/src/pteracuda_context.erl @@ -0,0 +1,18 @@ +-module(pteracuda_context). + +-include("pteracuda_internals.hrl"). + +-export([new/0, + new/1, + destroy/1]). + +new() -> + {ok, Ctx} = pteracuda_nifs:new_context(), + {ok, #pc_context{ref=Ctx}}. + +new(Device) when is_integer(Device) -> + {ok, Ctx} = pteracuda_nifs:new_context(Device), + {ok, #pc_context{ref=Ctx}}. + +destroy(#pc_context{ref=Ctx}) -> + pteracuda_nifs:destroy_context(Ctx). diff --git a/src/pteracuda_demo.erl b/src/pteracuda_demo.erl new file mode 100644 index 0000000..e6ee0f5 --- /dev/null +++ b/src/pteracuda_demo.erl @@ -0,0 +1,24 @@ +-module(pteracuda_demo). + +-compile([export_all, + native]). + +start(N) -> + {T1, T2, T3} = erlang:now(), + random:seed(T1, T2, T3), + io:format("Generating test data: ~p~n", [N]), + D = [random:uniform(N) || _ <- lists:seq(1, N)], + io:format("Measuring performance "), + {Time1, _} = timer:tc(lists, sort, [D]), + io:format("."), + {ok, C} = pteracuda_context:new(), + {ok, B} = pteracuda_buffer:new(integer), + pteracuda_buffer:write(B, D), + {Time2, _} = timer:tc(pteracuda_demo, pteracuda_sort, [C, B, D]), + io:format(".~n"), + io:format("Erlang: ~pms, CUDA: ~pms~n", [Time1 / 1000, Time2 / 1000]). + +pteracuda_sort(C, B, D) -> + pteracuda_buffer:write(B, D), + pteracuda_buffer:sort(C, B), + pteracuda_buffer:read(B). diff --git a/src/pteracuda_internals.hrl b/src/pteracuda_internals.hrl new file mode 100644 index 0000000..bafb647 --- /dev/null +++ b/src/pteracuda_internals.hrl @@ -0,0 +1,4 @@ +-record(pc_buffer, {type, + ref}). + +-record(pc_context, {ref}). diff --git a/src/pteracuda_nifs.erl b/src/pteracuda_nifs.erl index 316cced..538be05 100644 --- a/src/pteracuda_nifs.erl +++ b/src/pteracuda_nifs.erl @@ -18,6 +18,7 @@ -export([new_int_buffer/0, new_string_buffer/0, + new_float_buffer/0, destroy_buffer/1, buffer_size/1]). @@ -48,6 +49,9 @@ new_int_buffer() -> new_string_buffer() -> ?MISSING_NIF. +new_float_buffer() -> + ?MISSING_NIF. + destroy_buffer(_Buffer) -> ?MISSING_NIF. @@ -101,12 +105,22 @@ create_destroy_test() -> {ok, Buf} = pteracuda_nifs:new_int_buffer(), ok = pteracuda_nifs:destroy_buffer(Buf). +create_destroy_float_test() -> + {ok, Buf} = pteracuda_nifs:new_float_buffer(), + ok = pteracuda_nifs:destroy_buffer(Buf). + create_write_destroy_test() -> {ok, Buf} = pteracuda_nifs:new_int_buffer(), pteracuda_nifs:write_buffer(Buf, [1,2,3,4,5]), {ok, 5} = pteracuda_nifs:buffer_size(Buf), ok = pteracuda_nifs:destroy_buffer(Buf). +create_write_destroy_float_test() -> + {ok, Buf} = pteracuda_nifs:new_float_buffer(), + pteracuda_nifs:write_buffer(Buf, [0.01, 0.002, 0.0003, 0.4, 1.5]), + {ok, 5} = pteracuda_nifs:buffer_size(Buf), + ok = pteracuda_nifs:destroy_buffer(Buf). + create_write_delete_test() -> {ok, Buf} = pteracuda_nifs:new_int_buffer(), ok = pteracuda_nifs:write_buffer(Buf, [1,2,3,4,5]), @@ -116,6 +130,15 @@ create_write_delete_test() -> {ok, [3,4,5]} = pteracuda_nifs:read_buffer(Buf), pteracuda_nifs:destroy_buffer(Buf). +create_write_delete_float_test() -> + {ok, Buf} = pteracuda_nifs:new_float_buffer(), + ok = pteracuda_nifs:write_buffer(Buf, [1.1,1.2,1.3,1.4,1.5]), + ok = pteracuda_nifs:buffer_delete(Buf, 1), + {ok, [1.1,1.3,1.4,1.5]} = pteracuda_nifs:read_buffer(Buf), + ok = pteracuda_nifs:buffer_delete(Buf, 0), + {ok, [1.3,1.4,1.5]} = pteracuda_nifs:read_buffer(Buf), + pteracuda_nifs:destroy_buffer(Buf). + insert_test() -> {ok, Buf} = pteracuda_nifs:new_int_buffer(), ok = pteracuda_nifs:buffer_insert(Buf, 0, 1), @@ -127,6 +150,17 @@ insert_test() -> {ok, [1,2,6,3,4,5]} = pteracuda_nifs:read_buffer(Buf), pteracuda_nifs:destroy_buffer(Buf). +insert_float_test() -> + {ok, Buf} = pteracuda_nifs:new_float_buffer(), + ok = pteracuda_nifs:buffer_insert(Buf, 0, 1.0), + error = pteracuda_nifs:buffer_insert(Buf, 5, 2.0), + {ok, [1.0]} = pteracuda_nifs:read_buffer(Buf), + ok = pteracuda_nifs:clear_buffer(Buf), + ok = pteracuda_nifs:write_buffer(Buf, [1.0,2.0,3.0,4.0,5.0]), + ok = pteracuda_nifs:buffer_insert(Buf, 2, 6.0), + {ok, [1.0,2.0,6.0,3.0,4.0,5.0]} = pteracuda_nifs:read_buffer(Buf), + pteracuda_nifs:destroy_buffer(Buf). + create_write_sort_destroy_test() -> {ok, Buf} = pteracuda_nifs:new_int_buffer(), {ok, Ctx} = pteracuda_nifs:new_context(), @@ -137,6 +171,16 @@ create_write_sort_destroy_test() -> ok = pteracuda_nifs:destroy_buffer(Buf), ok = pteracuda_nifs:destroy_context(Ctx). +create_write_sort_destroy_float_test() -> + {ok, Buf} = pteracuda_nifs:new_float_buffer(), + {ok, Ctx} = pteracuda_nifs:new_context(), + ok = pteracuda_nifs:write_buffer(Buf, [3.1,2.1,1.1,4.1,5.1]), + {ok, 5} = pteracuda_nifs:buffer_size(Buf), + ok = pteracuda_nifs:sort_buffer(Ctx, Buf), + {ok, [1.1,2.1,3.1,4.1,5.1]} = pteracuda_nifs:read_buffer(Buf), + ok = pteracuda_nifs:destroy_buffer(Buf), + ok = pteracuda_nifs:destroy_context(Ctx). + create_write_clear_test() -> {ok, Buf} = pteracuda_nifs:new_int_buffer(), ok = pteracuda_nifs:write_buffer(Buf, [3,2,1,4,5]), @@ -155,6 +199,16 @@ create_write_contains_test() -> ok = pteracuda_nifs:destroy_buffer(Buf), ok = pteracuda_nifs:destroy_context(Ctx). +create_write_contains_float_test() -> + {ok, Buf} = pteracuda_nifs:new_float_buffer(), + {ok, Ctx} = pteracuda_nifs:new_context(), + N = [X + 0.0001 || X <- lists:seq(1, 1000)], + ok = pteracuda_nifs:write_buffer(Buf, N), + true = pteracuda_nifs:buffer_contains(Ctx, Buf, 513.0001), + false = pteracuda_nifs:buffer_contains(Ctx, Buf, 1500.0), + ok = pteracuda_nifs:destroy_buffer(Buf), + ok = pteracuda_nifs:destroy_context(Ctx). + create_copy_test() -> {ok, Buf} = pteracuda_nifs:new_int_buffer(), ok = pteracuda_nifs:write_buffer(Buf, lists:seq(1, 1000)),