Skip to content

Commit

Permalink
Merge branch 'master' into feature/experimental-pybind
Browse files Browse the repository at this point in the history
  • Loading branch information
mingruimingrui committed May 23, 2020
2 parents c0fe610 + d132872 commit b986e85
Show file tree
Hide file tree
Showing 24 changed files with 125 additions and 48 deletions.
5 changes: 4 additions & 1 deletion .appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,11 @@ for:
- SET arch=%platform%
- IF "%platform%"=="x86" SET arch=Win32

- cmake -A%arch% -S. -Bbuild -DCMAKE_INSTALL_PREFIX:PATH=.
- cmake -A%arch% -S. -Bbuild -DCMAKE_INSTALL_PREFIX:PATH=. -DENABLE_GTEST:BOOL=ON -DENABLE_BENCHMARK:BOOL=ON -DCMAKE_BUILD_TYPE=Release
- cmake --build build --config Release --target install
test_script:
- cd build
- ctest --verbose -C Release
after_build:
- 7z a OpenCC.zip build/bin build/include build/lib build/share
artifacts:
Expand Down
4 changes: 4 additions & 0 deletions .npmignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,17 @@
CMakeLists.txt
*.cmake
*.pyc
*.cmd
*.tgz

/.github
/.vscode
/.appveyor.yml
/.clang-format
/.travis.yml
/Makefile
/src/*Test.cpp
/src/*TestBase.cpp
/doc
/data/scheme
/deps/google-benchmark
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ set (PACKAGE_URL https://github.com/BYVoid/Opencc)
set (PACKAGE_BUGREPORT https://github.com/BYVoid/Opencc/issues)
set (OPENCC_VERSION_MAJOR 1)
set (OPENCC_VERSION_MINOR 1)
set (OPENCC_VERSION_REVISION 0)
set (OPENCC_VERSION_REVISION 1)

if (CMAKE_BUILD_TYPE MATCHES Debug)
set (version_suffix .Debug)
Expand Down
10 changes: 10 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# Change History of OpenCC

## Version 1.1.1

2020年5月22日

* 正式提供[Python](https://pypi.org/project/OpenCC/)接口和TypeScript類型標註。
* 更新動態鏈接庫`SOVERSION``1.1`,由於C++內部接口發生變更。
* 進一步改進與Windows MSVC的兼容性。
* 簡化頭文件結構,加快編譯速度。刪除不必要的`using`
* 修復部分香港標準字。

## Version 1.1.0

2020年5月10日
Expand Down
14 changes: 12 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,24 +118,34 @@ Document 文檔: https://byvoid.github.io/OpenCC/

### Build with CMake

Linux (g++ 4.6 is required) and Mac OS X (clang 3.2 is required):
#### Linux & Mac OS X

g++ 4.6+ or clang 3.2+ is required.

```bash
make
```

Windows Visual Studio:
#### Windows Visual Studio:

```bash
build.cmd
```

### Test 測試

#### Linux & Mac OS X

```
make test
```

#### Windows Visual Studio:

```bash
test.cmd
```

### Benchmark 基準測試

```
Expand Down
2 changes: 1 addition & 1 deletion node/global.gypi
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"variables": {
"opencc_version": "1.1.0"
"opencc_version": "1.1.1"
},
"target_defaults": {
"defines": [
Expand Down
2 changes: 1 addition & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "opencc",
"version": "1.1.0-3",
"version": "1.1.1",
"description": "Conversion between Traditional and Simplified Chinese",
"author": "Carbo Kuo <byvoid@byvoid.com>",
"license": "Apache-2.0",
Expand Down
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,8 @@ def initialize_options(self):
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Developers',
'Intended Audience :: Science/Research',
'Natural Language :: Chinese (Simplified)',
'Natural Language :: Chinese (Traditional)',
'Programming Language :: Python',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 3',
Expand All @@ -216,6 +218,7 @@ def initialize_options(self):
'Topic :: Software Development :: Libraries',
'Topic :: Software Development :: Libraries :: Python Modules',
'Topic :: Software Development :: Localization',
'Topic :: Text Processing :: Linguistic',
],
license='Apache License 2.0',
keywords=['opencc', 'convert', 'chinese']
Expand Down
18 changes: 17 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ set_target_properties(
OUTPUT_NAME
opencc
VERSION
1.1.0
1.1.1
SOVERSION
1.1
)
Expand All @@ -136,10 +136,26 @@ install(
# Gtest

if (ENABLE_GTEST)
if (WIN32)
add_custom_target(
copy_gtest_to_src
${CMAKE_COMMAND} -E copy $<TARGET_FILE:gtest> ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Copy gtest"
)
add_custom_target(
copy_gtest_main_to_src
${CMAKE_COMMAND} -E copy $<TARGET_FILE:gtest_main> ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Copy gtest_main"
)
endif()

foreach(TESTCASE ${UNITTESTS})
add_executable(${TESTCASE} ${TESTCASE}.cpp)
target_link_libraries(${TESTCASE} gtest gtest_main libopencc)
add_test(${TESTCASE} ${TESTCASE})
if (WIN32)
add_dependencies(${TESTCASE} copy_gtest_to_src copy_gtest_main_to_src)
endif()
endforeach()
endif()

Expand Down
4 changes: 2 additions & 2 deletions src/ConfigTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ TEST_F(ConfigTest, ConvertBuffer) {
TEST_F(ConfigTest, NonexistingPath) {
const std::string path = "/opencc/no/such/file/or/directory";
try {
const ConverterPtr converter = config.NewFromFile(path);
const ConverterPtr _ = config.NewFromFile(path);
} catch (FileNotFound& e) {
EXPECT_EQ(path + " not found or not accessible.", e.what());
}
Expand All @@ -67,7 +67,7 @@ TEST_F(ConfigTest, NewFromStringWitoutTrailingSlash) {
(std::istreambuf_iterator<char>()));
std::string pathWithoutTrailingSlash = CMAKE_SOURCE_DIR "/test/config_test";

const ConverterPtr converter =
const ConverterPtr _ =
config.NewFromString(content, pathWithoutTrailingSlash);
}

Expand Down
6 changes: 3 additions & 3 deletions src/DictGroup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,10 @@ std::vector<const DictEntry*> DictGroup::MatchAllPrefixes(const char* word,
const std::vector<const DictEntry*>& entries =
dict->MatchAllPrefixes(word, len);
for (const auto& entry : entries) {
size_t len = entry->KeyLength();
size_t entryLen = entry->KeyLength();
// If the current length has already result, skip
if (matched.find(len) == matched.end()) {
matched[len] = entry;
if (matched.find(entryLen) == matched.end()) {
matched[entryLen] = entry;
}
}
}
Expand Down
6 changes: 5 additions & 1 deletion src/PhraseExtract.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@

#include "PhraseExtract.hpp"

#ifdef _MSC_VER
#pragma execution_character_set("utf-8")
#endif

namespace opencc {

namespace internal {
Expand Down Expand Up @@ -97,7 +101,7 @@ class PhraseExtract::DictType {
void BuildTrie() {
std::unordered_map<std::string, int> key_item_id_map;
marisa::Keyset keyset;
for (size_t i = 0; i < items.size(); i++) {
for (int i = 0; i < items.size(); i++) {
const auto& key = items[i].first;
key_item_id_map[key.ToString()] = i;
keyset.push_back(key.CString(), key.ByteLength());
Expand Down
2 changes: 1 addition & 1 deletion src/SerializedValues.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ void SerializedValues::ConstructBuffer(std::string* valueBuffer,
for (const std::unique_ptr<DictEntry>& entry : *lexicon) {
assert(entry->NumValues() != 0);
for (const auto& value : entry->Values()) {
*valueTotalLength += value.length() + 1;
*valueTotalLength += static_cast<uint32_t>(value.length()) + 1;
}
}
// Write values to the buffer.
Expand Down
18 changes: 0 additions & 18 deletions src/TestUtilsUTF8.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,6 @@

namespace opencc {

#if defined(_MSC_VER) && _MSC_VER > 1310
// Visual C++ 2005 and later require the source files in UTF-8, and all
// std::strings to be encoded as wchar_t otherwise the std::strings will be
// converted into the local multibyte encoding and cause errors. To use a
// wchar_t as UTF-8, these std::strings then need to be convert back to UTF-8.
// This function is just a rough example of how to do this.
#include <Windows.h>
#define utf8(str) ConvertToUTF8(L##str)
std::string ConvertToUTF8(const wchar_t* pStr) {
static char szBuf[1024];
WideCharToMultiByte(CP_UTF8, 0, pStr, -1, szBuf, sizeof(szBuf), NULL, NULL);
return szBuf;
}

#else // if defined(_MSC_VER) && _MSC_VER > 1310
// Visual C++ 2003 and gcc will use the std::string literals as is, so the files
// should be saved as UTF-8. gcc requires the files to not have a UTF-8 BOM.
#define utf8(str) std::string(str)
#endif // if defined(_MSC_VER) && _MSC_VER > 1310

} // namespace opencc
3 changes: 2 additions & 1 deletion src/TextDict.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ TextDictPtr TextDict::NewFromDict(const Dict& dict) {
size_t TextDict::KeyMaxLength() const { return maxLength; }

Optional<const DictEntry*> TextDict::Match(const char* word, size_t len) const {
std::unique_ptr<DictEntry> entry(new NoValueDictEntry(word));
std::unique_ptr<DictEntry> entry(
new NoValueDictEntry(std::string(word, len)));
const auto& found = std::lower_bound(lexicon->begin(), lexicon->end(), entry,
DictEntry::UPtrLessThan);
if ((found != lexicon->end()) && ((*found)->Key() == entry->Key())) {
Expand Down
5 changes: 2 additions & 3 deletions src/TextDictTestBase.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,7 @@ class TextDictTestBase : public ::testing::Test {
DictPtr CreateDictForTaiwanVariants() const {
LexiconPtr lexicon(new Lexicon);
lexicon->Add(DictEntryFactory::New(utf8(""), utf8("")));
TextDictPtr textDict(new TextDict(lexicon));
return textDict;
return TextDictPtr(new TextDict(lexicon));
}

DictPtr CreateTaiwanPhraseDict() const {
Expand Down Expand Up @@ -109,7 +108,7 @@ class TextDictTestBase : public ::testing::Test {
EXPECT_EQ(utf8("BYVoid"), entry.Get()->Key());
EXPECT_EQ(utf8("byv"), entry.Get()->GetDefault());

entry = dict->MatchPrefix("清華大學");
entry = dict->MatchPrefix(utf8("清華大學"));
EXPECT_TRUE(!entry.IsNull());
EXPECT_EQ(utf8("清華大學"), entry.Get()->Key());
EXPECT_EQ(utf8("TsinghuaUniversity"), entry.Get()->GetDefault());
Expand Down
4 changes: 1 addition & 3 deletions src/UTF8StringSliceTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class UTF8StringSliceTest : public ::testing::Test {
UTF8StringSliceTest()
: text("天行健,君子以自強不息。地勢坤,君子以厚德載物。"), empty(""){};

const UTF8StringSlice text;
UTF8StringSlice text;
const UTF8StringSlice empty;
};

Expand Down Expand Up @@ -62,7 +62,6 @@ TEST_F(UTF8StringSliceTest, Compare) {
}

TEST_F(UTF8StringSliceTest, MoveRight) {
UTF8StringSlice text = this->text;
text.MoveRight();
EXPECT_EQ(UTF8StringSlice("行健,君子以自強不息。地勢坤,君子以厚德載物。"),
text);
Expand All @@ -75,7 +74,6 @@ TEST_F(UTF8StringSliceTest, MoveRight) {
}

TEST_F(UTF8StringSliceTest, MoveLeft) {
UTF8StringSlice text = this->text;
text.MoveLeft();
EXPECT_EQ(UTF8StringSlice("天行健,君子以自強不息。地勢坤,君子以厚德載物"),
text);
Expand Down
16 changes: 15 additions & 1 deletion src/benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,18 @@ include_directories(..)

add_executable(performance Performance.cpp)
target_link_libraries(performance benchmark libopencc)
add_test(performance performance)
add_test(BenchmarkTest performance)

if (WIN32)
add_custom_target(
copy_benchmark
${CMAKE_COMMAND} -E copy $<TARGET_FILE:benchmark> ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Copy benchmark"
)
add_custom_target(
copy_opencc
${CMAKE_COMMAND} -E copy $<TARGET_FILE:libopencc> ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Copy opencc"
)
add_dependencies(performance copy_benchmark copy_opencc)
endif()
5 changes: 5 additions & 0 deletions src/benchmark/Performance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
#include <iostream>
#include <memory>
#include <streambuf>

#ifdef _MSC_VER
#include <direct.h>
#else
#include <unistd.h>
#endif

#include "SimpleConverter.hpp"
#include "TestUtilsUTF8.hpp"
Expand Down
14 changes: 7 additions & 7 deletions src/tools/CommandLine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ void ConvertLineByLine() {
fclose(fout);
}

void Convert(std::string inputFileName) {
void Convert(std::string fileName) {
const int BUFFER_SIZE = 1024 * 1024;
static bool bufferInitialized = false;
static std::string buffer;
Expand All @@ -84,20 +84,20 @@ void Convert(std::string inputFileName) {
}

bool needToRemove = false;
if (!outputFileName.IsNull() && inputFileName == outputFileName.Get()) {
if (!outputFileName.IsNull() && fileName == outputFileName.Get()) {
// Special case: input == output
const std::string tempFileName = std::tmpnam(nullptr);
std::ifstream src(inputFileName, std::ios::binary);
std::ifstream src(fileName, std::ios::binary);
std::ofstream dst(tempFileName, std::ios::binary);
dst << src.rdbuf();
dst.close();
inputFileName = tempFileName;
fileName = tempFileName;
needToRemove = true;
}

FILE* fin = fopen(inputFileName.c_str(), "r");
FILE* fin = fopen(fileName.c_str(), "r");
if (!fin) {
throw FileNotFound(inputFileName);
throw FileNotFound(fileName);
}
FILE* fout = GetOutputStream();
while (!feof(fin)) {
Expand Down Expand Up @@ -139,7 +139,7 @@ void Convert(std::string inputFileName) {
fclose(fout);
if (needToRemove) {
// Remove temporary file.
std::remove(inputFileName.c_str());
std::remove(fileName.c_str());
}
}

Expand Down
4 changes: 4 additions & 0 deletions test.cmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
cmake -S. -Bbuild -DCMAKE_INSTALL_PREFIX:PATH=. -DENABLE_GTEST:BOOL=ON -DCMAKE_BUILD_TYPE=Debug
cmake --build build --config Debug --target install
cd build
ctest --verbose -C Debug
Loading

0 comments on commit b986e85

Please sign in to comment.