Skip to content
This repository has been archived by the owner on Feb 2, 2024. It is now read-only.

Commit

Permalink
Merge 68caf58 into 42c10cb
Browse files Browse the repository at this point in the history
  • Loading branch information
shssf committed Aug 9, 2019
2 parents 42c10cb + 68caf58 commit af8b6e8
Show file tree
Hide file tree
Showing 11 changed files with 873 additions and 559 deletions.
153 changes: 82 additions & 71 deletions hpat/_daal.cpp
Original file line number Diff line number Diff line change
@@ -1,86 +1,97 @@
#include <Python.h>
#include "daal.h"
#include <unordered_set>

#include "daal.h"

using namespace std;
using namespace daal;
using namespace daal::algorithms;
using namespace daal::data_management;
#define mpi_root 0

struct svc_payload {
struct svc_payload
{
services::SharedPtr<multi_class_classifier::training::Result>* trainingResultPtr;
int64_t n_classes;
};

struct mnb_payload {
struct mnb_payload
{
services::SharedPtr<multinomial_naive_bayes::training::Result>* trainingResultPtr;
int64_t n_classes;
};

void* svc_train(int64_t num_features, int64_t num_samples, double* X, double *y, int64_t *n_classes_ptr);
void svc_predict(void* model_ptr, int64_t num_features, int64_t num_samples, double* p, double *res, int64_t n_classes);
void* svc_train(int64_t num_features, int64_t num_samples, double* X, double* y, int64_t* n_classes_ptr);
void svc_predict(void* model_ptr, int64_t num_features, int64_t num_samples, double* p, double* res, int64_t n_classes);
void dtor_svc(void* model_ptr, int64_t size, void* in);

void* mnb_train(int64_t num_features, int64_t num_samples, int* X, int *y, int64_t *n_classes_ptr);
void mnb_predict(void* model_ptr, int64_t num_features, int64_t num_samples, int *p, int *res, int64_t n_classes);
void* mnb_train(int64_t num_features, int64_t num_samples, int* X, int* y, int64_t* n_classes_ptr);
void mnb_predict(void* model_ptr, int64_t num_features, int64_t num_samples, int* p, int* res, int64_t n_classes);
void dtor_mnb(void* model_ptr, int64_t size, void* in);

PyMODINIT_FUNC PyInit_daal_wrapper(void) {
PyObject *m;
PyMODINIT_FUNC PyInit_daal_wrapper(void)
{
PyObject* m;
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT, "daal_wrapper", "No docs", -1, NULL, };
PyModuleDef_HEAD_INIT,
"daal_wrapper",
"No docs",
-1,
NULL,
};

m = PyModule_Create(&moduledef);
if (m == NULL)
{
return NULL;
}

PyObject_SetAttrString(m, "svc_train",
PyLong_FromVoidPtr((void*)(&svc_train)));
PyObject_SetAttrString(m, "svc_predict",
PyLong_FromVoidPtr((void*)(&svc_predict)));
PyObject_SetAttrString(m, "dtor_svc",
PyLong_FromVoidPtr((void*)(&dtor_svc)));
PyObject_SetAttrString(m, "mnb_train",
PyLong_FromVoidPtr((void*)(&mnb_train)));
PyObject_SetAttrString(m, "mnb_predict",
PyLong_FromVoidPtr((void*)(&mnb_predict)));
PyObject_SetAttrString(m, "dtor_mnb",
PyLong_FromVoidPtr((void*)(&dtor_mnb)));
PyObject_SetAttrString(m, "svc_train", PyLong_FromVoidPtr((void*)(&svc_train)));
PyObject_SetAttrString(m, "svc_predict", PyLong_FromVoidPtr((void*)(&svc_predict)));
PyObject_SetAttrString(m, "dtor_svc", PyLong_FromVoidPtr((void*)(&dtor_svc)));
PyObject_SetAttrString(m, "mnb_train", PyLong_FromVoidPtr((void*)(&mnb_train)));
PyObject_SetAttrString(m, "mnb_predict", PyLong_FromVoidPtr((void*)(&mnb_predict)));
PyObject_SetAttrString(m, "dtor_mnb", PyLong_FromVoidPtr((void*)(&dtor_mnb)));

return m;
}

template <class T>
int64_t get_num_classes(T *y, int64_t num_samples)
int64_t get_num_classes(T* y, int64_t num_samples)
{
std::unordered_set<T> vals;
for(int64_t i=0; i<num_samples; i++)
for (int64_t i = 0; i < num_samples; i++)
{
vals.insert(y[i]);
}

return vals.size();
}

void* svc_train(int64_t num_features, int64_t num_samples, double* X, double *y, int64_t *n_classes_ptr)
void* svc_train(int64_t num_features, int64_t num_samples, double* X, double* y, int64_t* n_classes_ptr)
{
int64_t n_classes = *n_classes_ptr;
// if number of classes is not known, count label values and assign to ptr
// to update SVC data
if (n_classes==-1)
if (n_classes == -1)
{
n_classes = get_num_classes(y, num_samples);
}
*n_classes_ptr = n_classes;
// printf("svn_train nFeatures:%ld nSamples:%ld X[0]:%lf y[0]:%lf\n", num_features, num_samples, X[0], y[0]);
// printf("train classes: %lld\n", n_classes);
services::SharedPtr<svm::training::Batch<> > training(new svm::training::Batch<>());
services::SharedPtr<svm::training::Batch<>> training(new svm::training::Batch<>());
services::SharedPtr<multi_class_classifier::training::Result> trainingResult;
services::SharedPtr<svm::prediction::Batch<> > prediction(new svm::prediction::Batch<>());
services::SharedPtr<svm::prediction::Batch<>> prediction(new svm::prediction::Batch<>());
services::SharedPtr<kernel_function::KernelIface> kernel(new kernel_function::linear::Batch<>());
training->parameter.cacheSize = 100000000;
training->parameter.kernel = kernel;
prediction->parameter.kernel = kernel;

services::SharedPtr< HomogenNumericTable< double > > trainData =
HomogenNumericTable<double>::create(X, num_features, num_samples);
services::SharedPtr< HomogenNumericTable< double > > trainGroundTruth =
HomogenNumericTable<double>::create(y, 1, num_samples);
services::SharedPtr<HomogenNumericTable<double>> trainData =
HomogenNumericTable<double>::create(X, num_features, num_samples);
services::SharedPtr<HomogenNumericTable<double>> trainGroundTruth =
HomogenNumericTable<double>::create(y, 1, num_samples);
// printf("label rows: %ld\n", trainGroundTruth->getNumberOfRows());

multi_class_classifier::training::Batch<> algorithm;
Expand All @@ -94,31 +105,30 @@ void* svc_train(int64_t num_features, int64_t num_samples, double* X, double *y,

algorithm.compute();


trainingResult = algorithm.getResult();
// FIXME: return pointer to SharedPtr since get/set functions don't work
services::SharedPtr<multi_class_classifier::training::Result> * ptres =
services::SharedPtr<multi_class_classifier::training::Result>* ptres =
new services::SharedPtr<multi_class_classifier::training::Result>();
*ptres = trainingResult;
return ptres;
}

void svc_predict(void* model_ptr, int64_t num_features, int64_t num_samples, double* p, double *res, int64_t n_classes)
void svc_predict(void* model_ptr, int64_t num_features, int64_t num_samples, double* p, double* res, int64_t n_classes)
{
// printf("predict classes: %lld\n", n_classes);
services::SharedPtr<multi_class_classifier::training::Result>* trainingResultPtr =
(services::SharedPtr<multi_class_classifier::training::Result>*)(model_ptr);
services::SharedPtr<classifier::prediction::Result> predictionResult;
services::SharedPtr<svm::training::Batch<> > training(new svm::training::Batch<>());
services::SharedPtr<svm::prediction::Batch<> > prediction(new svm::prediction::Batch<>());
services::SharedPtr<svm::training::Batch<>> training(new svm::training::Batch<>());
services::SharedPtr<svm::prediction::Batch<>> prediction(new svm::prediction::Batch<>());
services::SharedPtr<kernel_function::KernelIface> kernel(new kernel_function::linear::Batch<>());

training->parameter.cacheSize = 100000000;
training->parameter.kernel = kernel;
prediction->parameter.kernel = kernel;

services::SharedPtr< HomogenNumericTable< double > > testData =
HomogenNumericTable<double>::create(p, num_features, num_samples);
services::SharedPtr<HomogenNumericTable<double>> testData =
HomogenNumericTable<double>::create(p, num_features, num_samples);

multi_class_classifier::prediction::Batch<> algorithm;

Expand All @@ -127,51 +137,49 @@ void svc_predict(void* model_ptr, int64_t num_features, int64_t num_samples, dou
algorithm.parameter.prediction = prediction;

algorithm.input.set(classifier::prediction::data, testData);
algorithm.input.set(classifier::prediction::model,
(*trainingResultPtr)->get(classifier::training::model));
algorithm.input.set(classifier::prediction::model, (*trainingResultPtr)->get(classifier::training::model));

algorithm.compute();

predictionResult = algorithm.getResult();
NumericTablePtr res_table = predictionResult->get(classifier::prediction::prediction);
BlockDescriptor<double> block1;
res_table->getBlockOfRows(0, num_samples, readOnly, block1);
double *data_ptr = block1.getBlockPtr();
double* data_ptr = block1.getBlockPtr();
// printf("%lf %lf\n", data_ptr[0], data_ptr[1]);
memcpy(res, data_ptr, num_samples*sizeof(double));
memcpy(res, data_ptr, num_samples * sizeof(double));
res_table->releaseBlockOfRows(block1);
return;
}

void dtor_svc(void* model_ptr, int64_t size, void* in)
{
svc_payload* st = (svc_payload*) model_ptr;
svc_payload* st = (svc_payload*)model_ptr;
delete st->trainingResultPtr;
return;
}

void* mnb_train(int64_t num_features, int64_t num_samples, int* X, int *y,
int64_t *n_classes_ptr)
void* mnb_train(int64_t num_features, int64_t num_samples, int* X, int* y, int64_t* n_classes_ptr)
{
int rankId, num_pes;
MPI_Comm_rank(MPI_COMM_WORLD, &rankId);
MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
size_t nBlocks= num_pes;
size_t nBlocks = num_pes;

int64_t n_classes = *n_classes_ptr;
// if number of classes is not known, count label values and assign to ptr
// to update MNB data
if (n_classes==-1)
if (n_classes == -1)
n_classes = get_num_classes(y, num_samples);
*n_classes_ptr = n_classes;

// printf("mnb_train nClasses:%ld nFeatures:%ld nSamples:%ld X[0]:%ld y[0]:%ld\n",
// n_classes, num_features, num_samples, X[0], y[0]);

services::SharedPtr< HomogenNumericTable< int > > trainData =
HomogenNumericTable<int>::create(X, num_features, num_samples);
services::SharedPtr< HomogenNumericTable< int > > trainGroundTruth =
HomogenNumericTable<int>::create(y, 1, num_samples);
services::SharedPtr<HomogenNumericTable<int>> trainData =
HomogenNumericTable<int>::create(X, num_features, num_samples);
services::SharedPtr<HomogenNumericTable<int>> trainGroundTruth =
HomogenNumericTable<int>::create(y, 1, num_samples);

multinomial_naive_bayes::training::ResultPtr trainingResult;
multinomial_naive_bayes::training::Distributed<step1Local> localAlgorithm(n_classes);
Expand All @@ -191,24 +199,28 @@ void* mnb_train(int64_t num_features, int64_t num_samples, int* X, int *y,
}
{
services::SharedPtr<byte> nodeResults(new byte[perNodeArchLength]);
dataArch.copyArchiveToArray(nodeResults.get(), perNodeArchLength );

MPI_Gather(nodeResults.get(), perNodeArchLength, MPI_CHAR,
serializedData.get(), perNodeArchLength, MPI_CHAR, mpi_root,
MPI_COMM_WORLD);
dataArch.copyArchiveToArray(nodeResults.get(), perNodeArchLength);

MPI_Gather(nodeResults.get(),
perNodeArchLength,
MPI_CHAR,
serializedData.get(),
perNodeArchLength,
MPI_CHAR,
mpi_root,
MPI_COMM_WORLD);
}

if(rankId == mpi_root)
if (rankId == mpi_root)
{
multinomial_naive_bayes::training::Distributed<step2Master> masterAlgorithm(n_classes);

for(size_t i = 0; i < nBlocks ; i++)
for (size_t i = 0; i < nBlocks; i++)
{
OutputDataArchive dataArch(serializedData.get() + perNodeArchLength * i, perNodeArchLength);

multinomial_naive_bayes::training::PartialResultPtr
dataForStep2FromStep1(
new multinomial_naive_bayes::training::PartialResult());
multinomial_naive_bayes::training::PartialResultPtr dataForStep2FromStep1(
new multinomial_naive_bayes::training::PartialResult());
dataForStep2FromStep1->deserialize(dataArch);

masterAlgorithm.input.add(multinomial_naive_bayes::training::partialModels, dataForStep2FromStep1);
Expand Down Expand Up @@ -244,26 +256,25 @@ void* mnb_train(int64_t num_features, int64_t num_samples, int* X, int *y,
}

// FIXME: return pointer to SharedPtr since get/set functions don't work
services::SharedPtr<multinomial_naive_bayes::training::Result> * ptres =
services::SharedPtr<multinomial_naive_bayes::training::Result>* ptres =
new services::SharedPtr<multinomial_naive_bayes::training::Result>();
*ptres = trainingResult;
return ptres;
}

void mnb_predict(void* model_ptr, int64_t num_features, int64_t num_samples,
int* p, int *res, int64_t n_classes)
void mnb_predict(void* model_ptr, int64_t num_features, int64_t num_samples, int* p, int* res, int64_t n_classes)
{
// printf("predict mnb classes: %lld\n", n_classes);
services::SharedPtr<multinomial_naive_bayes::training::Result>* trainingResult =
(services::SharedPtr<multinomial_naive_bayes::training::Result>*)(model_ptr);
services::SharedPtr<classifier::prediction::Result> predictionResult;

services::SharedPtr< HomogenNumericTable< int > > testData =
HomogenNumericTable<int>::create(p, num_features, num_samples);
services::SharedPtr<HomogenNumericTable<int>> testData =
HomogenNumericTable<int>::create(p, num_features, num_samples);

multinomial_naive_bayes::prediction::Batch<> algorithm(n_classes);

algorithm.input.set(classifier::prediction::data, testData);
algorithm.input.set(classifier::prediction::data, testData);
algorithm.input.set(classifier::prediction::model, (*trainingResult)->get(classifier::training::model));

algorithm.compute();
Expand All @@ -272,15 +283,15 @@ void mnb_predict(void* model_ptr, int64_t num_features, int64_t num_samples,
NumericTablePtr res_table = predictionResult->get(classifier::prediction::prediction);
BlockDescriptor<int> block1;
res_table->getBlockOfRows(0, num_samples, readOnly, block1);
int *data_ptr = block1.getBlockPtr();
int* data_ptr = block1.getBlockPtr();
// printf("%lf %lf\n", data_ptr[0], data_ptr[1]);
memcpy(res, data_ptr, num_samples*sizeof(int));
memcpy(res, data_ptr, num_samples * sizeof(int));
res_table->releaseBlockOfRows(block1);
return;
}
void dtor_mnb(void* model_ptr, int64_t size, void* in)
{
mnb_payload* st = (mnb_payload*) model_ptr;
mnb_payload* st = (mnb_payload*)model_ptr;
delete st->trainingResultPtr;
return;
}
30 changes: 17 additions & 13 deletions hpat/_distributed.cpp
Original file line number Diff line number Diff line change
@@ -1,23 +1,27 @@
#include "_distributed.h"

PyMODINIT_FUNC PyInit_hdist(void) {
PyObject *m;
PyMODINIT_FUNC PyInit_hdist(void)
{
PyObject* m;
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT, "hdist", "No docs", -1, NULL, };
PyModuleDef_HEAD_INIT,
"hdist",
"No docs",
-1,
NULL,
};

m = PyModule_Create(&moduledef);
if (m == NULL)
{
return NULL;
}

PyObject_SetAttrString(m, "hpat_dist_get_start",
PyLong_FromVoidPtr((void*)(&hpat_dist_get_start)));
PyObject_SetAttrString(m, "hpat_dist_get_end",
PyLong_FromVoidPtr((void*)(&hpat_dist_get_end)));
PyObject_SetAttrString(m, "hpat_dist_get_node_portion",
PyLong_FromVoidPtr((void*)(&hpat_dist_get_node_portion)));
PyObject_SetAttrString(m, "hpat_dist_get_item_pointer",
PyLong_FromVoidPtr((void*)(&hpat_dist_get_item_pointer)));
PyObject_SetAttrString(m, "hpat_get_dummy_ptr",
PyLong_FromVoidPtr((void*)(&hpat_get_dummy_ptr)));
PyObject_SetAttrString(m, "hpat_dist_get_start", PyLong_FromVoidPtr((void*)(&hpat_dist_get_start)));
PyObject_SetAttrString(m, "hpat_dist_get_end", PyLong_FromVoidPtr((void*)(&hpat_dist_get_end)));
PyObject_SetAttrString(m, "hpat_dist_get_node_portion", PyLong_FromVoidPtr((void*)(&hpat_dist_get_node_portion)));
PyObject_SetAttrString(m, "hpat_dist_get_item_pointer", PyLong_FromVoidPtr((void*)(&hpat_dist_get_item_pointer)));
PyObject_SetAttrString(m, "hpat_get_dummy_ptr", PyLong_FromVoidPtr((void*)(&hpat_get_dummy_ptr)));

return m;
}
Loading

0 comments on commit af8b6e8

Please sign in to comment.