From cfaf13e9a09aa3b8bb0bd76b5a92aa0923544a32 Mon Sep 17 00:00:00 2001 From: Sami Virpioja Date: Wed, 11 Oct 2023 13:35:57 +0300 Subject: [PATCH] Support for Python 3.11 builds (#3) * apply fix from https://github.com/Helsinki-NLP/opus-fast-mosestokenizer/pull/2 * use newer pybind11 * update static glib version * update macos runner to 12 * install pcre for macos runner * update version to 0.0.8.4 --- .github/workflows/release-github.yaml | 3 ++- CMakeLists.txt | 2 +- Makefile | 14 +++++++------- VERSION | 2 +- cmake/FindGlib2.cmake | 8 ++++---- src/Tokenizer.cpp | 2 +- 6 files changed, 16 insertions(+), 15 deletions(-) diff --git a/.github/workflows/release-github.yaml b/.github/workflows/release-github.yaml index 7c82ace..f325f92 100644 --- a/.github/workflows/release-github.yaml +++ b/.github/workflows/release-github.yaml @@ -24,7 +24,7 @@ jobs: path: dist/*.tar.gz build-wheels-macos: - runs-on: macos-10.15 + runs-on: macos-12 env: ACTIONS_ALLOW_UNSECURE_COMMANDS: "true" MINICONDA_FILENAME: Miniconda3-latest-MacOSX-x86_64.sh @@ -49,6 +49,7 @@ jobs: conda create -n meson python=3.8 conda activate meson conda install -y meson + brew install pcre make download-build-static-deps conda deactivate diff --git a/CMakeLists.txt b/CMakeLists.txt index b6dec52..cc40f2f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -153,7 +153,7 @@ if (BUILD_PYTHON) if (BUILD_SHARED_LIBS) find_package(pybind11 REQUIRED) else() - add_subdirectory(deps/pybind11-2.5.0) + add_subdirectory(deps/pybind11-2.10.3) endif() pybind11_add_module(_mosestokenizer src/python/mosestokenizer.cpp) diff --git a/Makefile b/Makefile index facf59f..36b8894 100644 --- a/Makefile +++ b/Makefile @@ -31,9 +31,9 @@ download-build-static-deps: @mkdir -p deps @echo "Downloading pybind" - curl -L -o deps/pybind-v2.5.0.tar.gz \ - https://github.com/pybind/pybind11/archive/v2.5.0.tar.gz - tar -C deps -xf deps/pybind-v2.5.0.tar.gz + curl -L -o deps/pybind-v2.10.3.tar.gz \ + https://github.com/pybind/pybind11/archive/v2.10.3.tar.gz + tar -C deps -xf deps/pybind-v2.10.3.tar.gz @echo "Downloading and building re2" curl -L -o deps/re2-2020-06-01.tar.gz \ @@ -42,11 +42,11 @@ download-build-static-deps: cd deps/re2-2020-06-01; CXXFLAGS="-fPIC" make @echo "Downloading and building glib2" - curl -L -o deps/glib-2.63.6.tar.gz \ - https://github.com/GNOME/glib/archive/2.63.6.tar.gz - tar -C deps -xf deps/glib-2.63.6.tar.gz + curl -L -o deps/glib-2.72.4.tar.xz \ + https://download.gnome.org/sources/glib/2.72/glib-2.72.4.tar.xz + tar -C deps -xf deps/glib-2.72.4.tar.xz ( \ - cd deps/glib-2.63.6; \ + cd deps/glib-2.72.4; \ meson build --default-library static; \ ninja -C build; \ ) diff --git a/VERSION b/VERSION index 733823d..01e469c 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.0.8.3 +0.0.8.4 diff --git a/cmake/FindGlib2.cmake b/cmake/FindGlib2.cmake index 6786069..2188310 100644 --- a/cmake/FindGlib2.cmake +++ b/cmake/FindGlib2.cmake @@ -30,11 +30,11 @@ else() # BUILD_SHARED_LIBS # Search for static library from deps set( Glib2_INCLUDE_DIRS - ${CMAKE_SOURCE_DIR}/deps/glib-2.63.6 - ${CMAKE_SOURCE_DIR}/deps/glib-2.63.6/glib - ${CMAKE_SOURCE_DIR}/deps/glib-2.63.6/build/glib + ${CMAKE_SOURCE_DIR}/deps/glib-2.72.4 + ${CMAKE_SOURCE_DIR}/deps/glib-2.72.4/glib + ${CMAKE_SOURCE_DIR}/deps/glib-2.72.4/build/glib ) -set(Glib2_LIBRARIES ${CMAKE_SOURCE_DIR}/deps/glib-2.63.6/build/glib/libglib-2.0.a) +set(Glib2_LIBRARIES ${CMAKE_SOURCE_DIR}/deps/glib-2.72.4/build/glib/libglib-2.0.a) add_library(glib-2.0::glib-2.0 STATIC IMPORTED) set(Glib2_FOUND ON) diff --git a/src/Tokenizer.cpp b/src/Tokenizer.cpp index bff6a81..cc88f48 100644 --- a/src/Tokenizer.cpp +++ b/src/Tokenizer.cpp @@ -652,7 +652,7 @@ Tokenizer::protected_tokenize(std::string& text) { // suppress break if it is an non-breaking prefix if (sentence_break_p) { re2::StringPiece pfx(words[ii].substr(0,len-1)); - std::string pfxs(pfx.as_string()); + std::string pfxs(pfx); if (nbpre_gen_set.find(pfxs) != nbpre_gen_set.end()) { // general non-breaking prefix sentence_break_p = false;