Skip to content

Commit

Permalink
[index] python bindings (#21)
Browse files Browse the repository at this point in the history
1st version of a python binding for index
  • Loading branch information
Hendrik Muhs committed Jan 11, 2018
1 parent 1b94e55 commit b64feaf
Show file tree
Hide file tree
Showing 11 changed files with 144 additions and 54 deletions.
14 changes: 10 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
cmake_minimum_required(VERSION 2.8.12)
project(keyvi)

string(TOLOWER ${CMAKE_BUILD_TYPE} LOWERCASE_CMAKE_BUILD_TYPE)

# configure C++11
if(NOT CMAKE_VERSION VERSION_LESS 3.1)
Expand All @@ -13,6 +14,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2")

set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -ggdb3")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3 -DNDEBUG")
set(CMAKE_CXX_FLAGS_PYTHON "${CMAKE_CXX_FLAGS_PYTHON} ${CMAKE_CXX_FLAGS_RELEASE}")
set(CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_DEBUG} -O0 --coverage")

set(COMPILE_TEST OFF CACHE BOOL "")
Expand Down Expand Up @@ -59,19 +61,23 @@ include_directories(keyvi/3rdparty/tiny-process-library/)
include_directories(${CMAKE_BINARY_DIR}/keyvi/3rdparty/tpie)
include_directories(${CMAKE_BINARY_DIR}/keyvi/3rdparty/tiny-process-library/)


FILE(GLOB_RECURSE UNIT_TEST_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} keyvi/tests/keyvi/*.cpp)
IF(NOT LOWERCASE_CMAKE_BUILD_TYPE MATCHES python)
FILE(GLOB_RECURSE UNIT_TEST_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} keyvi/tests/keyvi/*.cpp)
add_executable(unit_test_all ${UNIT_TEST_SOURCES})
target_link_libraries(unit_test_all tiny-process-library tpie ${Boost_LIBRARIES} ${ZLIB_LIBRARIES})
ENDIF(NOT LOWERCASE_CMAKE_BUILD_TYPE MATCHES python)

add_executable(keyvicompiler keyvi/bin/keyvicompiler/keyvicompiler.cpp)
add_executable(keyviinspector keyvi/bin/keyviinspector/keyviinspector.cpp)
add_executable(keyvimerger keyvi/bin/keyvimerger/keyvimerger.cpp)
add_executable(unit_test_all ${UNIT_TEST_SOURCES})

target_link_libraries(keyvicompiler tpie ${Boost_LIBRARIES} ${ZLIB_LIBRARIES})
target_link_libraries(keyviinspector tpie ${Boost_LIBRARIES} ${ZLIB_LIBRARIES})
target_link_libraries(keyvimerger tpie ${Boost_LIBRARIES} ${ZLIB_LIBRARIES})
target_link_libraries(unit_test_all tiny-process-library tpie ${Boost_LIBRARIES} ${ZLIB_LIBRARIES})

install (TARGETS keyvicompiler DESTINATION bin)
install (TARGETS keyviinspector DESTINATION bin)
install (TARGETS keyvimerger DESTINATION bin)

install (FILES $<TARGET_FILE:tpie> DESTINATION lib CONFIGURATIONS python)
install (FILES $<TARGET_FILE:tiny-process-library> DESTINATION lib CONFIGURATIONS python)
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@
//

/*
* index_writer.h
* index.h
*
* Created on: Jan 11, 2017
* Author: hendrik
*/

#ifndef KEYVI_INDEX_INDEX_WRITER_H_
#define KEYVI_INDEX_INDEX_WRITER_H_
#ifndef KEYVI_INDEX_INDEX_H_
#define KEYVI_INDEX_INDEX_H_

#include <algorithm>
#include <atomic>
Expand Down Expand Up @@ -55,11 +55,11 @@
namespace keyvi {
namespace index {

class IndexWriter final : public internal::BaseIndexReader<internal::IndexWriterWorker> {
class Index final : public internal::BaseIndexReader<internal::IndexWriterWorker> {
public:
explicit IndexWriter(const std::string& index_directory,
const std::chrono::milliseconds& flush_interval = std::chrono::milliseconds(1000))
: BaseIndexReader(index_directory, flush_interval) {
explicit Index(const std::string& index_directory,
const std::chrono::milliseconds& flush_interval = std::chrono::milliseconds(1000))
: BaseIndexReader(index_directory, flush_interval), lock_file_() {
index_directory_ = index_directory;

index_toc_file_ = index_directory_;
Expand All @@ -75,13 +75,13 @@ class IndexWriter final : public internal::BaseIndexReader<internal::IndexWriter

TRACE("locking index %s", index_lock_file.string().c_str());

std::ofstream o(index_lock_file.string(), std::ios_base::app);
lock_file_.open(index_lock_file.string(), std::ios_base::app);

index_lock_ = boost::interprocess::file_lock(index_lock_file.string().c_str());
index_lock_.lock();
}

~IndexWriter() {
~Index() {
// todo: happens to early, move into own class, destruct after worker is destructed
TRACE("Unlock Index");
try {
Expand All @@ -103,10 +103,11 @@ class IndexWriter final : public internal::BaseIndexReader<internal::IndexWriter
private:
boost::filesystem::path index_directory_;
boost::filesystem::path index_toc_file_;
std::ofstream lock_file_;
boost::interprocess::file_lock index_lock_;
};

} /* namespace index */
} /* namespace keyvi */

#endif // KEYVI_INDEX_INDEX_WRITER_H_
#endif // KEYVI_INDEX_INDEX_H_
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@
//

/*
* index_reader.h
* read_only_index.h
*
* Created on: Jan 11, 2017
* Author: hendrik
*/

#ifndef KEYVI_INDEX_INDEX_READER_H_
#define KEYVI_INDEX_INDEX_READER_H_
#ifndef KEYVI_INDEX_READ_ONLY_INDEX_H_
#define KEYVI_INDEX_READ_ONLY_INDEX_H_

#include <string>

Expand All @@ -37,18 +37,18 @@
namespace keyvi {
namespace index {

class IndexReader final : public internal::BaseIndexReader<internal::IndexReaderWorker> {
class ReadOnlyIndex final : public internal::BaseIndexReader<internal::IndexReaderWorker> {
public:
explicit IndexReader(const std::string index_directory, size_t refresh_interval = 1 /*, optional external logger*/)
explicit ReadOnlyIndex(const std::string index_directory, size_t refresh_interval = 1 /*, optional external logger*/)
: BaseIndexReader(index_directory, refresh_interval) {
Payload().StartWorkerThread();
}

~IndexReader() { Payload().StopWorkerThread(); }
~ReadOnlyIndex() { Payload().StopWorkerThread(); }

void Reload() { Payload().Reload(); }
};
} /* namespace index */
} /* namespace keyvi */

#endif // KEYVI_INDEX_INDEX_READER_H_
#endif // KEYVI_INDEX_READ_ONLY_INDEX_H_
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,19 @@
#include <boost/filesystem.hpp>
#include <boost/test/unit_test.hpp>

#include "index/index_writer.h"
#include "index/index.h"

namespace keyvi {
namespace index {
BOOST_AUTO_TEST_SUITE(IndexWriterTests)
BOOST_AUTO_TEST_SUITE(IndexTests)

BOOST_AUTO_TEST_CASE(basic_writer) {
using boost::filesystem::temp_directory_path;
using boost::filesystem::unique_path;

auto tmp_path = temp_directory_path();
tmp_path /= unique_path();
IndexWriter writer(tmp_path.string());
Index writer(tmp_path.string());

writer.Set("a", "{\"id\":3}");

Expand All @@ -59,7 +59,7 @@ BOOST_AUTO_TEST_CASE(bigger_feed) {

auto tmp_path = temp_directory_path();
tmp_path /= unique_path();
IndexWriter writer(tmp_path.string(), std::chrono::milliseconds(100));
Index writer(tmp_path.string(), std::chrono::milliseconds(100));

for (int i = 0; i < 10000; ++i) {
writer.Set("a", "{\"id\":" + std::to_string(i) + "}");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@
#include <boost/filesystem.hpp>
#include <boost/test/unit_test.hpp>

#include "index/index_reader.h"
#include "index/read_only_index.h"
#include "testing/index_mock.h"

namespace keyvi {
namespace index {
BOOST_AUTO_TEST_SUITE(IndexTests)
BOOST_AUTO_TEST_SUITE(ReadOnlyIndexTests)

BOOST_AUTO_TEST_CASE(loadIndex) {
testing::IndexMock index;
Expand All @@ -50,7 +50,7 @@ BOOST_AUTO_TEST_CASE(loadIndex) {

index.AddSegment(&test_data_2);

IndexReader reader(index.GetIndexFolder());
ReadOnlyIndex reader(index.GetIndexFolder());

BOOST_CHECK(reader.Contains("abc"));
BOOST_CHECK(reader.Contains("babdd"));
Expand Down
2 changes: 2 additions & 0 deletions python/MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ recursive-include autowrap_includes *.hpp
graft src
include keyvi.cpp

recursive-include keyvi/bin *.cpp
recursive-include keyvi/include/keyvi *.h

recursive-include keyvi/3rdparty *.h
Expand All @@ -16,3 +17,4 @@ recursive-include keyvi/3rdparty *.cmake.in

recursive-include keyvi/3rdparty/tpie *.md
graft keyvi/3rdparty/tpie/doc
include CMakeLists.txt
62 changes: 36 additions & 26 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
pykeyvi_pyx = 'keyvi.pyx'
pykeyvi_cpp = 'keyvi.cpp'

try:
cpu_count = multiprocessing.cpu_count()
except:
cpu_count = 1


def generate_pykeyvi_source():
addons = glob.glob('src/addons/*')
Expand All @@ -33,10 +38,12 @@ def generate_pykeyvi_source():
def symlink_keyvi():
if not path.exists('keyvi'):
os.symlink('../keyvi', 'keyvi')
shutil.copy('../CMakeLists.txt', 'CMakeLists.txt')
keyvi_source_path = os.path.realpath(os.path.join(os.getcwd(), "../keyvi"))
pykeyvi_source_path = os.path.join(os.getcwd(),"keyvi")
yield (pykeyvi_source_path, keyvi_source_path)
os.unlink('keyvi')
os.remove('CMakeLists.txt')
else:
yield None, None

Expand All @@ -46,10 +53,9 @@ def symlink_keyvi():
autowrap_data_dir = "autowrap_includes"

dictionary_sources = path.abspath('keyvi')
tpie_build_dir = path.join(dictionary_sources, '3rdparty/tpie/build')
tpie_install_prefix = 'install'
tpie_include_dir = path.join(tpie_build_dir, tpie_install_prefix, 'include')
tpie_lib_dir = path.join(tpie_build_dir, tpie_install_prefix, 'lib')
keyvi_build_dir = path.join('keyvi-build')
keyvi_install_prefix = 'install'
keyvi_lib_dir = path.join(keyvi_build_dir, keyvi_install_prefix, 'lib')

additional_compile_flags = []

Expand All @@ -73,14 +79,16 @@ def symlink_keyvi():
]

linklibraries = [
"tiny-process-library",
"tpie",
"z"
]

mac_os_static_libs_dir = 'mac_os_static_libs'

extra_link_arguments = []
link_library_dirs = [tpie_lib_dir]
link_library_dirs = [keyvi_lib_dir]
zlib_root = None

if sys.platform == 'darwin':
additional_compile_flags.append("-DOS_MACOSX")
Expand Down Expand Up @@ -122,6 +130,7 @@ def run(self):
global linklibraries_static_or_dynamic
global extra_link_arguments
global ext_modules
global zlib_root
print ("Building in {0} mode".format(self.mode))

if self.mode == 'debug':
Expand Down Expand Up @@ -161,6 +170,7 @@ def run(self):

# custom zlib location
if self.zlib_root:
zlib_root = self.zlib_root
for ext_m in ext_modules:
include_dirs = [path.join(self.zlib_root, "include")] + getattr(ext_m, 'include_dirs')
setattr(ext_m, 'include_dirs', include_dirs)
Expand Down Expand Up @@ -214,37 +224,37 @@ def run(self):
dst_file = path.join(mac_os_static_libs_dir, lib_file_name)
shutil.copyfile(src_file, dst_file)

if not path.exists(path.join(tpie_lib_dir, 'libtpie.a')):
try:
cpu_count = multiprocessing.cpu_count()
except:
cpu_count = 1

CMAKE_CXX_FLAGS = '-fPIC -std=c++11'
if sys.platform == 'darwin':
CMAKE_CXX_FLAGS += ' -mmacosx-version-min=10.9'

tpie_build_cmd = 'mkdir -p {}'.format(tpie_build_dir)
tpie_build_cmd += ' && cd {}'.format(tpie_build_dir)
tpie_build_cmd += ' && cmake -D CMAKE_BUILD_TYPE:STRING=Release ' \
' -D TPIE_PARALLEL_SORT=1 -D COMPILE_TEST=OFF -D CMAKE_CXX_FLAGS="{CXX_FLAGS}"' \
' -D CMAKE_INSTALL_PREFIX={INSTALL_PREFIX} ..'.format(
CXX_FLAGS=CMAKE_CXX_FLAGS, INSTALL_PREFIX=tpie_install_prefix)
tpie_build_cmd += ' && make -j {}'.format(cpu_count)
tpie_build_cmd += ' && make install'

subprocess.call(tpie_build_cmd, shell=True)
CMAKE_CXX_FLAGS = '-fPIC -std=c++11'
if sys.platform == 'darwin':
CMAKE_CXX_FLAGS += ' -mmacosx-version-min=10.9'

keyvi_build_cmd = 'mkdir -p {}'.format(keyvi_build_dir)
keyvi_build_cmd += ' && cd {}'.format(keyvi_build_dir)
keyvi_build_cmd += ' && cmake -D CMAKE_BUILD_TYPE:STRING=python ' \
' -D CMAKE_CXX_FLAGS="{CXX_FLAGS}"' \
' -D CMAKE_INSTALL_PREFIX={INSTALL_PREFIX}'.format(
CXX_FLAGS=CMAKE_CXX_FLAGS, INSTALL_PREFIX=keyvi_install_prefix)
if zlib_root is not None:
keyvi_build_cmd += ' -D ZLIB_ROOT={ZLIB_ROOT}'.format(ZLIB_ROOT=zlib_root)
keyvi_build_cmd += ' ..'
keyvi_build_cmd += ' && make -j {}'.format(cpu_count)
keyvi_build_cmd += ' && make install'

print ("Building keyvi C++ part: " + keyvi_build_cmd)
subprocess.call(keyvi_build_cmd, shell=True)

os.environ['ARCHFLAGS'] = '-arch x86_64'
_build_ext.build_ext.run(self)


ext_modules = [Extension('keyvi',
include_dirs=[autowrap_data_dir,
tpie_include_dir,
path.join(dictionary_sources, '3rdparty/tpie'),
path.join(os.path.join(keyvi_build_dir,'keyvi/3rdparty/tpie')),
path.join(dictionary_sources, 'include/keyvi'),
path.join(dictionary_sources, '3rdparty/rapidjson/include'),
path.join(dictionary_sources, '3rdparty/msgpack-c/include'),
path.join(dictionary_sources, '3rdparty/tiny-process-library'),
path.join(dictionary_sources, '3rdparty/utf8'),
path.join(dictionary_sources, '3rdparty/misc'),
path.join(dictionary_sources, '3rdparty/xchange/src')],
Expand Down
36 changes: 36 additions & 0 deletions python/src/addons/ReadOnlyIndex.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@


def get (self, key, default = None):
if isinstance(key, unicode):
key = key.encode('utf-8')
assert isinstance(key, bytes), 'arg in_0 wrong type'

cdef shared_ptr[_Match] _r = shared_ptr[_Match](new _Match(deref(self.inst.get())[(<libcpp_string>key)]))

if _r.get().IsEmpty():
return default
cdef Match py_result = Match.__new__(Match)
py_result.inst = _r
return py_result

def __contains__(self, key):
if isinstance(key, unicode):
key = key.encode('utf-8')

assert isinstance(key, bytes), 'arg in_0 wrong type'

return self.inst.get().Contains(key)

def __getitem__ (self, key):
if isinstance(key, unicode):
key = key.encode('utf-8')

assert isinstance(key, bytes), 'arg in_0 wrong type'

cdef shared_ptr[_Match] _r = shared_ptr[_Match](new _Match(deref(self.inst.get())[(<libcpp_string>key)]))

if _r.get().IsEmpty():
raise KeyError(key)
cdef Match py_result = Match.__new__(Match)
py_result.inst = _r
return py_result
7 changes: 7 additions & 0 deletions python/src/pxds/index.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from libcpp.string cimport string as libcpp_utf8_string

cdef extern from "index/index.h" namespace "keyvi::index":
cdef cppclass Index:
Index(libcpp_utf8_string) except+
void Set(libcpp_utf8_string, libcpp_utf8_string) except+
void Flush()
Loading

0 comments on commit b64feaf

Please sign in to comment.