From 9a13fe6c4e22db40f63c008459626df69ec057d6 Mon Sep 17 00:00:00 2001 From: Piero Toffanin Date: Wed, 3 Jan 2024 18:17:07 -0500 Subject: [PATCH] use getline, remove target char limit --- README.md | 2 -- dedup.cpp | 6 ++++-- setup.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 3a728db..1cb1c5d 100644 --- a/README.md +++ b/README.md @@ -28,8 +28,6 @@ Source and target must have the same number of lines. No validation checks are m Duplication checks are only made on the source content. If you want to check for duplicates on the target, simply switch the order of the parameters. -Target lines must be less than 4096 characters in length. - ## Build ```bash diff --git a/dedup.cpp b/dedup.cpp index a9170aa..612fdb0 100644 --- a/dedup.cpp +++ b/dedup.cpp @@ -7,7 +7,9 @@ std::tuple dedup(const std::string &src, const throw std::runtime_error("Unable to open " + src); } - DWORD file_size = GetFileSize(file_handle, NULL); + LARGE_INTEGER li; + GetFileSizeEx(file_handle, &li); + SIZE_T file_size = li.QuadPart; HANDLE file_mapping = CreateFileMapping(file_handle, NULL, PAGE_READONLY, 0, 0, NULL); if (file_mapping == NULL) { CloseHandle(file_handle); @@ -41,7 +43,7 @@ std::tuple dedup(const std::string &src, const std::unordered_multimap lines; - std::ifstream tgt_is(tgt); + std::ifstream tgt_is(tgt, std::ios_base::in | std::ios_base::binary); if (!tgt_is.is_open()) throw std::runtime_error("Cannot open " + tgt); std::string src_out = src + ".dedup"; diff --git a/setup.py b/setup.py index 8157ae2..c093b56 100644 --- a/setup.py +++ b/setup.py @@ -124,7 +124,7 @@ def build_extension(self, ext: CMakeExtension) -> None: setup( name="removedup", - version="1.0.5", + version="1.0.6", author="Piero Toffanin", author_email="pt@masseranolabs.com", url="https://github.com/LibreTranslate/RemoveDUP",