Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions buildscripts/hpat-conda-recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,21 @@ requirements:
- pandas
- mpich # [not win]
- pyarrow 0.9.*
- boost
- boost-cpp
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is the difference of boost and boost-cpp?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

boost is a meta package from anconda, which depends on libboost and boost-py (or so). The conda-forge naming is different. The arrow packages we use from conda-forge use the ones from conda-forge. anaconda is behind with the version, we cannot mix.

- hdf5
- h5py
- daal-devel
- opencv 3.3.1
- hpat-parquet-reader # [not win]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need hpat-parquet-reader for building with default Anaconda.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why?


run:
- python 3.6.*
- numpy 1.14.*
- pandas
- mpich # [not win]
- pyarrow 0.9.*
- boost
- boost-cpp
- numba 0.38.*
- hdf5
- h5py
- hpat-parquet-reader # [not win]

test:
imports:
Expand Down
2 changes: 1 addition & 1 deletion hpat/_parquet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

// just include parquet reader on Windows since the GCC ABI change issue
// doesn't exist, and VC linker removes unused lib symbols
#ifdef _MSC_VER
#if defined(_MSC_VER) || defined(BUILTIN_PARQUET_READER)
#include <parquet_reader/hpat_parquet_reader.cpp>
#else

Expand Down
10 changes: 6 additions & 4 deletions hpat/_str_ext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@
#include <vector>


#ifdef USE_BOOST_REGEX
#include <boost/regex.hpp>
using boost::regex;
using boost::regex_search;
#else
#include <regex>
using std::regex;
using std::regex_search;

// #include <boost/regex.hpp>
// using boost::regex;
// using boost::regex_search;
#endif

#ifndef _WIN32
#include <glob.h>
Expand Down
8 changes: 8 additions & 0 deletions hpat/tests/test_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,14 @@ def test_impl(a):
# XXX: use startswith since hpat output can have extra characters
self.assertTrue(h_res.startswith(py_res))

def test_regex(self):
def test_impl(_str, _pat):
return hpat.str_ext.contains_regex(_str, hpat.str_ext.compile_regex(_pat))
hpat_func = hpat.jit(test_impl)
self.assertEqual(hpat_func('What does the fox say', r'd.*(the |fox ){2}'), True)
self.assertEqual(hpat_func('What does the fox say', r'[kz]u*'), False)


# string array tests
def test_string_array_constructor(self):
# create StringArray and return as list of strings
Expand Down
2 changes: 1 addition & 1 deletion parquet_reader/hpat_parquet_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ void pq_init_reader(const char* file_name,

// parquet type sizes (NOT arrow)
// boolean, int32, int64, int96, float, double
int pq_type_sizes[] = {1, 4, 8, 12, 4, 8};
static int pq_type_sizes[] = {1, 4, 8, 12, 4, 8};


int64_t pq_get_size_single_file(const char* file_name, int64_t column_idx)
Expand Down
164 changes: 90 additions & 74 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,51 +74,66 @@ def readme():
if 'HPAT_XE_SUPPORT' in os.environ and os.environ['HPAT_XE_SUPPORT'] != "0":
_has_xenon = True

ind = [PREFIX_DIR+'/include',]
lid = [PREFIX_DIR+'/lib',]
eca = ['-std=c++11',] # '-g', '-O0']
ela = ['-std=c++11',]

MPI_LIBS = ['mpi']
H5_COMPILE_FLAGS = []
H5_CPP_FLAGS = []
if is_win:
# use Intel MPI on Windows
MPI_LIBS = ['impi', 'impicxx']
# hdf5-parallel Windows build uses CMake which needs this flag
H5_COMPILE_FLAGS = ['-DH5_BUILT_AS_DYNAMIC_LIB']
H5_CPP_FLAGS = [('H5_BUILT_AS_DYNAMIC_LIB', None)]


ext_io = Extension(name="hio",
libraries = ['hdf5'] + MPI_LIBS + ['boost_filesystem'],
include_dirs = [HDF5_DIR+'/include/', PREFIX_DIR+'/include/'],
library_dirs = [HDF5_DIR+'/lib/' + PREFIX_DIR+'/lib/'],
extra_compile_args = H5_COMPILE_FLAGS,
sources=["hpat/_io.cpp"]
)
sources=["hpat/_io.cpp"],
libraries = ['hdf5'] + MPI_LIBS + ['boost_filesystem'],
include_dirs = [HDF5_DIR+'/include',] + ind,
library_dirs = [HDF5_DIR+'/lib',] + lid,
define_macros = H5_CPP_FLAGS,
extra_compile_args = eca,
extra_link_args = ela,
)

ext_hdist = Extension(name="hdist",
libraries = MPI_LIBS,
sources=["hpat/_distributed.cpp"],
include_dirs=[PREFIX_DIR+'/include/'],
extra_compile_args=['-std=c++11'],
extra_link_args=['-std=c++11'],
)
sources=["hpat/_distributed.cpp"],
libraries = MPI_LIBS,
extra_compile_args = eca,
extra_link_args = ela,
include_dirs = ind,
library_dirs = lid,
)

ext_chiframes = Extension(name="chiframes",
libraries = MPI_LIBS,
sources=["hpat/_hiframes.cpp"],
depends=["hpat/_hpat_sort.h"],
include_dirs=[PREFIX_DIR+'/include/'],
)
sources=["hpat/_hiframes.cpp"],
libraries = MPI_LIBS,
depends=["hpat/_hpat_sort.h"],
extra_compile_args = eca,
extra_link_args = ela,
include_dirs = ind,
library_dirs = lid,
)


ext_dict = Extension(name="hdict_ext",
sources=["hpat/_dict_ext.cpp"]
)
sources=["hpat/_dict_ext.cpp"],
extra_compile_args = eca,
extra_link_args = ela,
include_dirs = ind,
library_dirs = lid,
)

ext_str = Extension(name="hstr_ext",
sources=["hpat/_str_ext.cpp"],
#include_dirs=[PREFIX_DIR+'/include/'],
#libraries=['boost_regex'],
extra_compile_args=['-std=c++11'],
extra_link_args=['-std=c++11'],
**np_compile_args,
#language="c++"
libraries=['boost_regex'] + np_compile_args['libraries'],
define_macros = np_compile_args['define_macros'] + [('USE_BOOST_REGEX', None)],
extra_compile_args = eca,
extra_link_args = ela,
include_dirs = np_compile_args['include_dirs'] + ind,
library_dirs = np_compile_args['library_dirs'] + lid,
)

#dt_args = copy.copy(np_compile_args)
Expand All @@ -135,71 +150,73 @@ def readme():
#)

ext_quantile = Extension(name="quantile_alg",
libraries = MPI_LIBS,
sources=["hpat/_quantile_alg.cpp"],
include_dirs=[PREFIX_DIR+'/include/'],
extra_compile_args=['-std=c++11'],
extra_link_args=['-std=c++11'],
)
sources=["hpat/_quantile_alg.cpp"],
libraries = MPI_LIBS,
extra_compile_args = eca,
extra_link_args = ela,
include_dirs = ind,
library_dirs = lid,
)

pq_libs = MPI_LIBS + ['boost_filesystem']

if is_win:
pq_libs += ['arrow', 'parquet']
else:
# seperate parquet reader used due to ABI incompatibility of arrow
pq_libs += ['hpat_parquet_reader']
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we remove hpat_parquet_reader as a separate package, we can't build HPAT with default Anaconda environment anymore. Can we have both default Anaconda and Intel builds? maybe if macros?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens with anaconda?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Anaconda is not compatible with conda-forge (we use parquet/arrow) because of compiler version differences (gcc 7.2 vs 4.8). hpat_parquet_reader provides a C interface (without std::string) to avoid the issue.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the bigger problem is boost, which we use from conda-forge.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's easier to go around boost problems.

pq_libs = MPI_LIBS + ['boost_filesystem', 'arrow', 'parquet']

ext_parquet = Extension(name="parquet_cpp",
libraries = pq_libs,
sources=["hpat/_parquet.cpp"],
include_dirs=[PREFIX_DIR+'/include/', '.'],
library_dirs = [PREFIX_DIR+'/lib/'],
extra_compile_args=['-std=c++11'],
extra_link_args=['-std=c++11'],
)

ext_daal_wrapper = Extension(name="daal_wrapper",
include_dirs = [DAALROOT+'/include'],
libraries = ['daal_core', 'daal_thread']+MPI_LIBS,
sources=["hpat/_daal.cpp"]
)
sources=["hpat/_parquet.cpp"],
libraries = pq_libs,
include_dirs = ['.'] + ind,
define_macros = [('BUILTIN_PARQUET_READER', None)],
extra_compile_args = eca,
extra_link_args = ela,
library_dirs = lid,
)

#ext_daal_wrapper = Extension(name="daal_wrapper",
# include_dirs = [DAALROOT+'/include'],
# libraries = ['daal_core', 'daal_thread']+MPI_LIBS,
# sources=["hpat/_daal.cpp"]
# )

ext_ros = Extension(name="ros_cpp",
include_dirs = ['/opt/ros/lunar/include', '/opt/ros/lunar/include/xmlrpcpp', PREFIX_DIR+'/include/', './ros_include'],
extra_link_args='-rdynamic /opt/ros/lunar/lib/librosbag.so /opt/ros/lunar/lib/librosbag_storage.so -lboost_program_options /opt/ros/lunar/lib/libroslz4.so /opt/ros/lunar/lib/libtopic_tools.so /opt/ros/lunar/lib/libroscpp.so -lboost_filesystem -lboost_signals /opt/ros/lunar/lib/librosconsole.so /opt/ros/lunar/lib/librosconsole_log4cxx.so /opt/ros/lunar/lib/librosconsole_backend_interface.so -lboost_regex /opt/ros/lunar/lib/libroscpp_serialization.so /opt/ros/lunar/lib/librostime.so /opt/ros/lunar/lib/libxmlrpcpp.so /opt/ros/lunar/lib/libcpp_common.so -lboost_system -lboost_thread -lboost_chrono -lboost_date_time -lboost_atomic -lpthread -Wl,-rpath,/opt/ros/lunar/lib'.split(),
sources=["hpat/_ros.cpp"]
)
sources=["hpat/_ros.cpp"],
include_dirs = ['/opt/ros/lunar/include', '/opt/ros/lunar/include/xmlrpcpp', PREFIX_DIR+'/include/', './ros_include'],
extra_compile_args = eca,
extra_link_args = ela + '-rdynamic /opt/ros/lunar/lib/librosbag.so /opt/ros/lunar/lib/librosbag_storage.so -lboost_program_options /opt/ros/lunar/lib/libroslz4.so /opt/ros/lunar/lib/libtopic_tools.so /opt/ros/lunar/lib/libroscpp.so -lboost_filesystem -lboost_signals /opt/ros/lunar/lib/librosconsole.so /opt/ros/lunar/lib/librosconsole_log4cxx.so /opt/ros/lunar/lib/librosconsole_backend_interface.so -lboost_regex /opt/ros/lunar/lib/libroscpp_serialization.so /opt/ros/lunar/lib/librostime.so /opt/ros/lunar/lib/libxmlrpcpp.so /opt/ros/lunar/lib/libcpp_common.so -lboost_system -lboost_thread -lboost_chrono -lboost_date_time -lboost_atomic -lpthread -Wl,-rpath,/opt/ros/lunar/lib'.split(),
library_dirs = lid,
)

cv_libs = ['opencv_core', 'opencv_imgproc', 'opencv_imgcodecs', 'opencv_highgui']
# XXX cv lib file name needs version on Windows
if is_win:
cv_libs = [l+'331' for l in cv_libs]

ext_cv_wrapper = Extension(name="cv_wrapper",
include_dirs = [OPENCV_DIR+'/include'],
library_dirs = [os.path.join(OPENCV_DIR,'lib')],
libraries = cv_libs,
#extra_link_args = cv_link_args,
sources=["hpat/_cv.cpp"],
language="c++",
)
sources=["hpat/_cv.cpp"],
include_dirs = [OPENCV_DIR+'/include'] + ind,
library_dirs = [os.path.join(OPENCV_DIR,'lib')] + lid,
libraries = cv_libs,
#extra_link_args = cv_link_args,
language="c++",
)

ext_xenon_wrapper = Extension(name="hxe_ext",
#include_dirs = ['/usr/include'],
include_dirs = ['.'],
library_dirs = ['.'],
libraries = ['xe'],
sources=["hpat/_xe_wrapper.cpp"]
)
sources=["hpat/_xe_wrapper.cpp"],
#include_dirs = ['/usr/include'],
include_dirs = ['.'] + ind,
library_dirs = ['.'] + lid,
libraries = ['xe'],
extra_compile_args = eca,
extra_link_args = ela,
)

_ext_mods = [ext_hdist, ext_chiframes, ext_dict, ext_str, ext_quantile]

if _has_h5py:
_ext_mods.append(ext_io)
if _has_pyarrow:
_ext_mods.append(ext_parquet)
if _has_daal:
_ext_mods.append(ext_daal_wrapper)
#if _has_daal:
# _ext_mods.append(ext_daal_wrapper)
if _has_ros:
_ext_mods.append(ext_ros)
if _has_opencv:
Expand All @@ -209,7 +226,7 @@ def readme():
_ext_mods.append(ext_xenon_wrapper)

setup(name='hpat',
version='0.2.0',
version='0.3.0',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should figure out our versioning policy.

description='compiling Python code for clusters',
long_description=readme(),
classifiers=[
Expand All @@ -223,8 +240,7 @@ def readme():
],
keywords='data analytics cluster',
url='https://github.com/IntelLabs/hpat',
author='Ehsan Totoni',
author_email='ehsan.totoni@intel.com',
author='Intel',
packages=['hpat'],
install_requires=['numba'],
extras_require={'HDF5': ["h5py"], 'Parquet': ["pyarrow"]},
Expand Down