diff --git a/buildscripts/hpat-conda-recipe/meta.yaml b/buildscripts/hpat-conda-recipe/meta.yaml index 4e6a122eb..8b9998331 100644 --- a/buildscripts/hpat-conda-recipe/meta.yaml +++ b/buildscripts/hpat-conda-recipe/meta.yaml @@ -21,12 +21,10 @@ requirements: - pandas - mpich # [not win] - pyarrow 0.9.* - - boost + - boost-cpp - hdf5 - h5py - - daal-devel - opencv 3.3.1 - - hpat-parquet-reader # [not win] run: - python 3.6.* @@ -34,11 +32,10 @@ requirements: - pandas - mpich # [not win] - pyarrow 0.9.* - - boost + - boost-cpp - numba 0.38.* - hdf5 - h5py - - hpat-parquet-reader # [not win] test: imports: diff --git a/hpat/_parquet.cpp b/hpat/_parquet.cpp index 5453307e2..3191c2e44 100644 --- a/hpat/_parquet.cpp +++ b/hpat/_parquet.cpp @@ -9,7 +9,7 @@ // just include parquet reader on Windows since the GCC ABI change issue // doesn't exist, and VC linker removes unused lib symbols -#ifdef _MSC_VER +#if defined(_MSC_VER) || defined(BUILTIN_PARQUET_READER) #include #else diff --git a/hpat/_str_ext.cpp b/hpat/_str_ext.cpp index 1caba4006..0bfc90d7c 100644 --- a/hpat/_str_ext.cpp +++ b/hpat/_str_ext.cpp @@ -6,13 +6,15 @@ #include +#ifdef USE_BOOST_REGEX +#include +using boost::regex; +using boost::regex_search; +#else #include using std::regex; using std::regex_search; - -// #include -// using boost::regex; -// using boost::regex_search; +#endif #ifndef _WIN32 #include diff --git a/hpat/tests/test_strings.py b/hpat/tests/test_strings.py index 6060fd2a8..b2cd07e72 100644 --- a/hpat/tests/test_strings.py +++ b/hpat/tests/test_strings.py @@ -79,6 +79,14 @@ def test_impl(a): # XXX: use startswith since hpat output can have extra characters self.assertTrue(h_res.startswith(py_res)) + def test_regex(self): + def test_impl(_str, _pat): + return hpat.str_ext.contains_regex(_str, hpat.str_ext.compile_regex(_pat)) + hpat_func = hpat.jit(test_impl) + self.assertEqual(hpat_func('What does the fox say', r'd.*(the |fox ){2}'), True) + self.assertEqual(hpat_func('What does the fox say', r'[kz]u*'), False) + + # string array tests def test_string_array_constructor(self): # create StringArray and return as list of strings diff --git a/parquet_reader/hpat_parquet_reader.cpp b/parquet_reader/hpat_parquet_reader.cpp index 70b8311e2..fb1d8cb8c 100644 --- a/parquet_reader/hpat_parquet_reader.cpp +++ b/parquet_reader/hpat_parquet_reader.cpp @@ -42,7 +42,7 @@ void pq_init_reader(const char* file_name, // parquet type sizes (NOT arrow) // boolean, int32, int64, int96, float, double -int pq_type_sizes[] = {1, 4, 8, 12, 4, 8}; +static int pq_type_sizes[] = {1, 4, 8, 12, 4, 8}; int64_t pq_get_size_single_file(const char* file_name, int64_t column_idx) diff --git a/setup.py b/setup.py index d8fd76655..e6b27eef4 100644 --- a/setup.py +++ b/setup.py @@ -74,51 +74,66 @@ def readme(): if 'HPAT_XE_SUPPORT' in os.environ and os.environ['HPAT_XE_SUPPORT'] != "0": _has_xenon = True +ind = [PREFIX_DIR+'/include',] +lid = [PREFIX_DIR+'/lib',] +eca = ['-std=c++11',] # '-g', '-O0'] +ela = ['-std=c++11',] + MPI_LIBS = ['mpi'] -H5_COMPILE_FLAGS = [] +H5_CPP_FLAGS = [] if is_win: # use Intel MPI on Windows MPI_LIBS = ['impi', 'impicxx'] # hdf5-parallel Windows build uses CMake which needs this flag - H5_COMPILE_FLAGS = ['-DH5_BUILT_AS_DYNAMIC_LIB'] + H5_CPP_FLAGS = [('H5_BUILT_AS_DYNAMIC_LIB', None)] ext_io = Extension(name="hio", - libraries = ['hdf5'] + MPI_LIBS + ['boost_filesystem'], - include_dirs = [HDF5_DIR+'/include/', PREFIX_DIR+'/include/'], - library_dirs = [HDF5_DIR+'/lib/' + PREFIX_DIR+'/lib/'], - extra_compile_args = H5_COMPILE_FLAGS, - sources=["hpat/_io.cpp"] - ) + sources=["hpat/_io.cpp"], + libraries = ['hdf5'] + MPI_LIBS + ['boost_filesystem'], + include_dirs = [HDF5_DIR+'/include',] + ind, + library_dirs = [HDF5_DIR+'/lib',] + lid, + define_macros = H5_CPP_FLAGS, + extra_compile_args = eca, + extra_link_args = ela, +) ext_hdist = Extension(name="hdist", - libraries = MPI_LIBS, - sources=["hpat/_distributed.cpp"], - include_dirs=[PREFIX_DIR+'/include/'], - extra_compile_args=['-std=c++11'], - extra_link_args=['-std=c++11'], - ) + sources=["hpat/_distributed.cpp"], + libraries = MPI_LIBS, + extra_compile_args = eca, + extra_link_args = ela, + include_dirs = ind, + library_dirs = lid, +) ext_chiframes = Extension(name="chiframes", - libraries = MPI_LIBS, - sources=["hpat/_hiframes.cpp"], - depends=["hpat/_hpat_sort.h"], - include_dirs=[PREFIX_DIR+'/include/'], - ) + sources=["hpat/_hiframes.cpp"], + libraries = MPI_LIBS, + depends=["hpat/_hpat_sort.h"], + extra_compile_args = eca, + extra_link_args = ela, + include_dirs = ind, + library_dirs = lid, +) ext_dict = Extension(name="hdict_ext", - sources=["hpat/_dict_ext.cpp"] - ) + sources=["hpat/_dict_ext.cpp"], + extra_compile_args = eca, + extra_link_args = ela, + include_dirs = ind, + library_dirs = lid, +) ext_str = Extension(name="hstr_ext", sources=["hpat/_str_ext.cpp"], - #include_dirs=[PREFIX_DIR+'/include/'], - #libraries=['boost_regex'], - extra_compile_args=['-std=c++11'], - extra_link_args=['-std=c++11'], - **np_compile_args, - #language="c++" + libraries=['boost_regex'] + np_compile_args['libraries'], + define_macros = np_compile_args['define_macros'] + [('USE_BOOST_REGEX', None)], + extra_compile_args = eca, + extra_link_args = ela, + include_dirs = np_compile_args['include_dirs'] + ind, + library_dirs = np_compile_args['library_dirs'] + lid, ) #dt_args = copy.copy(np_compile_args) @@ -135,41 +150,40 @@ def readme(): #) ext_quantile = Extension(name="quantile_alg", - libraries = MPI_LIBS, - sources=["hpat/_quantile_alg.cpp"], - include_dirs=[PREFIX_DIR+'/include/'], - extra_compile_args=['-std=c++11'], - extra_link_args=['-std=c++11'], - ) + sources=["hpat/_quantile_alg.cpp"], + libraries = MPI_LIBS, + extra_compile_args = eca, + extra_link_args = ela, + include_dirs = ind, + library_dirs = lid, +) -pq_libs = MPI_LIBS + ['boost_filesystem'] -if is_win: - pq_libs += ['arrow', 'parquet'] -else: - # seperate parquet reader used due to ABI incompatibility of arrow - pq_libs += ['hpat_parquet_reader'] +pq_libs = MPI_LIBS + ['boost_filesystem', 'arrow', 'parquet'] ext_parquet = Extension(name="parquet_cpp", - libraries = pq_libs, - sources=["hpat/_parquet.cpp"], - include_dirs=[PREFIX_DIR+'/include/', '.'], - library_dirs = [PREFIX_DIR+'/lib/'], - extra_compile_args=['-std=c++11'], - extra_link_args=['-std=c++11'], - ) - -ext_daal_wrapper = Extension(name="daal_wrapper", - include_dirs = [DAALROOT+'/include'], - libraries = ['daal_core', 'daal_thread']+MPI_LIBS, - sources=["hpat/_daal.cpp"] - ) + sources=["hpat/_parquet.cpp"], + libraries = pq_libs, + include_dirs = ['.'] + ind, + define_macros = [('BUILTIN_PARQUET_READER', None)], + extra_compile_args = eca, + extra_link_args = ela, + library_dirs = lid, +) + +#ext_daal_wrapper = Extension(name="daal_wrapper", +# include_dirs = [DAALROOT+'/include'], +# libraries = ['daal_core', 'daal_thread']+MPI_LIBS, +# sources=["hpat/_daal.cpp"] +# ) ext_ros = Extension(name="ros_cpp", - include_dirs = ['/opt/ros/lunar/include', '/opt/ros/lunar/include/xmlrpcpp', PREFIX_DIR+'/include/', './ros_include'], - extra_link_args='-rdynamic /opt/ros/lunar/lib/librosbag.so /opt/ros/lunar/lib/librosbag_storage.so -lboost_program_options /opt/ros/lunar/lib/libroslz4.so /opt/ros/lunar/lib/libtopic_tools.so /opt/ros/lunar/lib/libroscpp.so -lboost_filesystem -lboost_signals /opt/ros/lunar/lib/librosconsole.so /opt/ros/lunar/lib/librosconsole_log4cxx.so /opt/ros/lunar/lib/librosconsole_backend_interface.so -lboost_regex /opt/ros/lunar/lib/libroscpp_serialization.so /opt/ros/lunar/lib/librostime.so /opt/ros/lunar/lib/libxmlrpcpp.so /opt/ros/lunar/lib/libcpp_common.so -lboost_system -lboost_thread -lboost_chrono -lboost_date_time -lboost_atomic -lpthread -Wl,-rpath,/opt/ros/lunar/lib'.split(), - sources=["hpat/_ros.cpp"] - ) + sources=["hpat/_ros.cpp"], + include_dirs = ['/opt/ros/lunar/include', '/opt/ros/lunar/include/xmlrpcpp', PREFIX_DIR+'/include/', './ros_include'], + extra_compile_args = eca, + extra_link_args = ela + '-rdynamic /opt/ros/lunar/lib/librosbag.so /opt/ros/lunar/lib/librosbag_storage.so -lboost_program_options /opt/ros/lunar/lib/libroslz4.so /opt/ros/lunar/lib/libtopic_tools.so /opt/ros/lunar/lib/libroscpp.so -lboost_filesystem -lboost_signals /opt/ros/lunar/lib/librosconsole.so /opt/ros/lunar/lib/librosconsole_log4cxx.so /opt/ros/lunar/lib/librosconsole_backend_interface.so -lboost_regex /opt/ros/lunar/lib/libroscpp_serialization.so /opt/ros/lunar/lib/librostime.so /opt/ros/lunar/lib/libxmlrpcpp.so /opt/ros/lunar/lib/libcpp_common.so -lboost_system -lboost_thread -lboost_chrono -lboost_date_time -lboost_atomic -lpthread -Wl,-rpath,/opt/ros/lunar/lib'.split(), + library_dirs = lid, +) cv_libs = ['opencv_core', 'opencv_imgproc', 'opencv_imgcodecs', 'opencv_highgui'] # XXX cv lib file name needs version on Windows @@ -177,20 +191,23 @@ def readme(): cv_libs = [l+'331' for l in cv_libs] ext_cv_wrapper = Extension(name="cv_wrapper", - include_dirs = [OPENCV_DIR+'/include'], - library_dirs = [os.path.join(OPENCV_DIR,'lib')], - libraries = cv_libs, - #extra_link_args = cv_link_args, - sources=["hpat/_cv.cpp"], - language="c++", - ) + sources=["hpat/_cv.cpp"], + include_dirs = [OPENCV_DIR+'/include'] + ind, + library_dirs = [os.path.join(OPENCV_DIR,'lib')] + lid, + libraries = cv_libs, + #extra_link_args = cv_link_args, + language="c++", +) + ext_xenon_wrapper = Extension(name="hxe_ext", - #include_dirs = ['/usr/include'], - include_dirs = ['.'], - library_dirs = ['.'], - libraries = ['xe'], - sources=["hpat/_xe_wrapper.cpp"] - ) + sources=["hpat/_xe_wrapper.cpp"], + #include_dirs = ['/usr/include'], + include_dirs = ['.'] + ind, + library_dirs = ['.'] + lid, + libraries = ['xe'], + extra_compile_args = eca, + extra_link_args = ela, +) _ext_mods = [ext_hdist, ext_chiframes, ext_dict, ext_str, ext_quantile] @@ -198,8 +215,8 @@ def readme(): _ext_mods.append(ext_io) if _has_pyarrow: _ext_mods.append(ext_parquet) -if _has_daal: - _ext_mods.append(ext_daal_wrapper) +#if _has_daal: +# _ext_mods.append(ext_daal_wrapper) if _has_ros: _ext_mods.append(ext_ros) if _has_opencv: @@ -209,7 +226,7 @@ def readme(): _ext_mods.append(ext_xenon_wrapper) setup(name='hpat', - version='0.2.0', + version='0.3.0', description='compiling Python code for clusters', long_description=readme(), classifiers=[ @@ -223,8 +240,7 @@ def readme(): ], keywords='data analytics cluster', url='https://github.com/IntelLabs/hpat', - author='Ehsan Totoni', - author_email='ehsan.totoni@intel.com', + author='Intel', packages=['hpat'], install_requires=['numba'], extras_require={'HDF5': ["h5py"], 'Parquet': ["pyarrow"]},