diff --git a/lcftrans/src/entry.cpp b/lcftrans/src/entry.cpp index c074167..17a8cd2 100644 --- a/lcftrans/src/entry.cpp +++ b/lcftrans/src/entry.cpp @@ -43,3 +43,7 @@ void Entry::write(std::ostream& out) const { write_n(out, original, "msgid"); write_n(out, translation, "msgstr"); } + +bool Entry::hasTranslation() const { + return !(translation.size() == 1 && translation[0].empty()); +} diff --git a/lcftrans/src/entry.h b/lcftrans/src/entry.h index a143658..4d0f7a2 100644 --- a/lcftrans/src/entry.h +++ b/lcftrans/src/entry.h @@ -17,8 +17,11 @@ class Entry { std::string context; // msgctxt std::vector info; // #. std::string location; // #: // Unused, maybe useful later + bool fuzzy = false; // When true write a "#, fuzzy" marker void write(std::ostream& out) const; + + bool hasTranslation() const; }; #endif diff --git a/lcftrans/src/main.cpp b/lcftrans/src/main.cpp index e07e786..89cd745 100644 --- a/lcftrans/src/main.cpp +++ b/lcftrans/src/main.cpp @@ -26,34 +26,40 @@ void DumpLdb(const std::string& filename); void DumpLmu(const std::string& filename); void DumpLmt(const std::string& filename); +int MatchMode(); static int print_help(char** argv) { - std::cerr << "lcftrans - Translate RPG Maker 2000/2003 projects\n"; + std::cerr << "lcftrans - Translate RPG Maker 2000/2003 projects\n"; // | <- EOL std::cerr << "Usage: " << argv[0] << " [OPTION...] DIRECTORY [ENCODING]\n"; std::cerr << "Required options (one of):\n"; - std::cerr << " -c, --create Create a new translation\n"; - std::cerr << " -u, --update Update an existing translation\n"; + std::cerr << " -c, --create Create a new translation\n"; + std::cerr << " -u, --update Update an existing translation\n"; + std::cerr << " -m, --match MDIR Match the translations in MDIR and DIRECTORY. When matched\n"; + std::cerr << " the original in MDIR becomes the translation of DIRECTORY.\n"; + std::cerr << " Used to generate translations from games where the trans-\n"; + std::cerr << " lation is hardcoded in the game files.\n"; std::cerr << "\n"; std::cerr << "Optional options:\n"; - std::cerr << " -h, --help This usage message\n"; - std::cerr << " -o, --output Output directory (default: working directory)\n"; + std::cerr << " -h, --help This usage message\n"; + std::cerr << " -o, --output OUTDIR Output directory (default: working directory)\n"; std::cerr << "\n"; - std::cerr << "When not specified the encoding is read from RPG_RT.ini or auto-detected.\n"; + std::cerr << "When not specified the encoding is read from RPG_RT.ini or auto-detected\n"; return 2; } std::string encoding; std::string outdir = "."; +std::string merge_indir; +std::string indir; std::vector> source_files; std::vector> outdir_files; std::string ini_file; std::string database_file; +bool create = false; bool update = false; +bool match = false; int main(int argc, char** argv) { - std::string indir; - bool create = false; - if (argc <= 1) { return print_help(argv); } @@ -62,6 +68,8 @@ int main(int argc, char** argv) { for (int i = 1; i < argc; ++i) { std::string arg = argv[i]; + bool any_mode = create || update || match; + if ((arg == "--help") || (arg == "-h")) { return print_help(argv); } else if ((arg == "--output") || (arg == "-o")) { @@ -70,15 +78,26 @@ int main(int argc, char** argv) { ++i; } } else if ((arg == "--create") || (arg == "-c")) { - if (update) { + if (any_mode) { return print_help(argv); } create = true; } else if ((arg == "--update") || (arg == "-u")) { - if (create) { + if (any_mode) { return print_help(argv); } update = true; + } else if ((arg == "--merge") || (arg == "-m")) { + if (any_mode) { + return print_help(argv); + } + match = true; + if (i + 1 < argc) { + merge_indir = argv[i + 1]; + ++i; + } else { + return print_help(argv); + } } else { indir = arg; if (i+1 < argc) { @@ -88,7 +107,7 @@ int main(int argc, char** argv) { } } - if (indir.empty() || (!update && !create)) { + if (indir.empty() || (!update && !create && !match) || outdir == merge_indir) { return print_help(argv); } @@ -101,16 +120,41 @@ int main(int argc, char** argv) { dirHandle = opendir(outdir.c_str()); if (!dirHandle) { - std::cerr << "Can not access output directory " << outdir << "\n"; + std::cerr << "Cannot access output directory " << outdir << "\n"; return 1; } if (update) { while (nullptr != (dirEntry = readdir(dirHandle))) { outdir_files.emplace_back(dirEntry->d_name, Utils::LowerCase(dirEntry->d_name)); } + } else if (match) { + // Also read the source translations + closedir(dirHandle); + dirHandle = opendir(indir.c_str()); + if (!dirHandle) { + std::cerr << "Cannot access input directory " << indir << "\n"; + return 1; + } + while (nullptr != (dirEntry = readdir(dirHandle))) { + outdir_files.emplace_back(dirEntry->d_name, Utils::LowerCase(dirEntry->d_name)); + } + closedir(dirHandle); + dirHandle = opendir(merge_indir.c_str()); + if (!dirHandle) { + std::cerr << "Cannot access merge input directory " << merge_indir << "\n"; + return 1; + } + while (nullptr != (dirEntry = readdir(dirHandle))) { + source_files.emplace_back(dirEntry->d_name, Utils::LowerCase(dirEntry->d_name)); + } } + closedir(dirHandle); + if (match) { + return MatchMode(); + } + dirHandle = opendir(indir.c_str()); if (dirHandle) { while (nullptr != (dirEntry = readdir(dirHandle))) { @@ -258,3 +302,58 @@ void DumpLmt(const std::string& filename) { DumpLmuLmtInner(filename, t, "RPG_RT.lmt"); } +int MatchMode() { + std::sort(source_files.begin(), source_files.end(), [](const auto& a, const auto& b) { + return a.first < b.first; + }); + + auto ends_with_po = [](const std::string& src) { + std::string po = ".po"; + if (src.length() >= po.size()) { + std::string src_l = Utils::LowerCase(src); + return (0 == src_l.compare(src_l.length() - po.length(), po.length(), po)); + } else { + return false; + } + }; + + for (const auto& s : source_files) { + if (!ends_with_po(s.first)) { + continue; + } + + for (const auto& o : outdir_files) { + if (s.second == o.second) { + Translation src_po = Translation::fromPO(merge_indir + "/" + s.first); + Translation dst_po = Translation::fromPO( indir + "/" + o.first); + int matched; + auto stale = dst_po.Match(src_po, matched); + std::cout << "Matching " << o.first << "\n"; + std::cout << " " << matched << " term" << (matched != 1 ? "s" : "") << " matched\n"; + + int fuzzy = 0; + for (const auto& e : dst_po.getEntries()) { + if (e.fuzzy) { + ++fuzzy; + } + } + if (fuzzy > 0) { + std::string term = fuzzy == 1 ? " term is " : " terms are "; + std::cout << " " << fuzzy << term << "fuzzy matched\n"; + } + + if (!stale.getEntries().empty()) { + std::string term = stale.getEntries().size() == 1 ? " term is " : " terms are "; + std::cout << " " << stale.getEntries().size() << term << "unmatched\n"; + std::ofstream outfile(outdir + "/" + o.first.substr(0, o.first.size() - 3) + ".unmatched.po"); + stale.write(outfile); + } + std::ofstream outfile(outdir + "/" + o.first); + dst_po.write(outfile); + continue; + } + } + } + + return 0; +} diff --git a/lcftrans/src/translation.cpp b/lcftrans/src/translation.cpp index e7deb12..74cdfc5 100644 --- a/lcftrans/src/translation.cpp +++ b/lcftrans/src/translation.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -62,10 +63,14 @@ void Translation::writeEntries(std::ostream& out) { } if (!e->info.empty()) { - for (const auto& info: e->info) { + for (const auto &info: e->info) { out << "#. " << info << "\n"; } } + + if (e->fuzzy) { + out << "#, fuzzy\n"; + } } items[s][0]->write(out); @@ -114,6 +119,92 @@ Translation Translation::Merge(const Translation& from) { return stale; } +Translation Translation::Match(const Translation& from, int& matches) { + matches = 0; + auto efrom = from.getEntries(); + + auto starts_with = [](const std::string& line, const std::string& search) { + return line.find(search) == 0; + }; + + Translation stale; + for (auto& e_from : efrom) { + bool found = false; + if (!e_from.context.empty()) { + // Match by context first + for (auto& e_to : entries) { + if (e_to.hasTranslation()) { + continue; + } + + if (e_from.context == e_to.context) { + // Also ensure that the ID matches to reduce false-positive rate + std::string info = Utils::Join(e_from.info, '\n'); + if (starts_with(info, "ID ")) { + std::string info_to = Utils::Join(e_to.info, '\n'); + if (info != info_to) { + continue; + } + } + + e_to.translation = e_from.original; + ++matches; + found = true; + break; + } + } + } else { + std::string info = Utils::Join(e_from.info, '\n'); + if (starts_with(info, "ID ")) { + // Is a event location identifier + // Attempt exact match + for (auto& e_to : entries) { + if (e_to.hasTranslation()) { + continue; + } + + std::string info_to = Utils::Join(e_to.info, '\n'); + if (info == info_to) { + e_to.translation = e_from.original; + found = true; + ++matches; + break; + } + } + // Attempt fuzzy match (Ignore line number) + if (!found) { + std::regex re("Line [0-9]+"); + std::string info_cp = info; + info.clear(); + std::regex_replace(std::back_inserter(info), info_cp.begin(), info_cp.end(), re, ""); + for (auto& e_to : entries) { + if (e_to.hasTranslation()) { + continue; + } + + std::string info_to_cp = Utils::Join(e_to.info, '\n'); + std::string info_to; + std::regex_replace(std::back_inserter(info_to), info_to_cp.begin(), info_to_cp.end(), re, ""); + if (info == info_to) { + e_to.translation = e_from.original; + e_to.fuzzy = true; + found = true; + ++matches; + break; + } + } + } + } + } + + if (!found) { + stale.addEntry(e_from); + } + } + + return stale; +} + template bool isEventCommandString(const lcf::ContextStructBase&) { return false; } bool isEventCommandString(const lcf::ContextStructBase& ctx) { return ctx.name == "string"; } @@ -368,7 +459,7 @@ Translation Translation::fromLMU(const std::string& filename, const std::string& Translation Translation::fromPO(const std::string& filename) { // Super simple parser. - // Only parses msgstr, msgid and msgctx + // Only parses msgstr, msgid, msgctx and #. Translation t; @@ -429,6 +520,10 @@ Translation Translation::fromPO(const std::string& filename) { return out.str(); }; + auto read_msgctx = [&]() { + e.context = extract_string(7); + }; + auto read_msgstr = [&]() { // Parse multiply lines until empty line or comment std::string msgstr = extract_string(6); @@ -461,6 +556,34 @@ Translation Translation::fromPO(const std::string& filename) { e.original = Utils::Split(msgid); }; + auto read_info = [&]() { + if (line.length() <= 3) { + return; + } + + // Parse multiply lines until empty line, msgctxt or msgid is encountered + e.info.push_back(line.substr(3)); + + while (std::getline(in, line, '\n')) { + if (line.empty() || starts_with("msgctx") || starts_with("msgid")) { + if (starts_with("msgctx")) { + read_msgctx(); + } else if (starts_with("msgid")) { + read_msgid(); + } + return; + } + else if (starts_with("#.")) { + if (line.length() > 3) { + e.info.push_back(line.substr(3)); + } + } else { + std::cerr << "Parse error " << line << " (" << line << "). Expected #., msgctx or msgid\n"; + return; + } + } + }; + while (std::getline(in, line, '\n')) { if (!found_header) { if (starts_with("msgstr")) { @@ -470,9 +593,11 @@ Translation Translation::fromPO(const std::string& filename) { } if (!parse_item) { - if (starts_with("msgctxt")) { - e.context = extract_string(7); - + if (starts_with("#.")) { + parse_item = true; + read_info(); + } else if (starts_with("msgctxt")) { + read_msgctx(); parse_item = true; } else if (starts_with("msgid")) { parse_item = true; diff --git a/lcftrans/src/translation.h b/lcftrans/src/translation.h index 2cd674b..c2a2e0c 100644 --- a/lcftrans/src/translation.h +++ b/lcftrans/src/translation.h @@ -29,6 +29,15 @@ class Translation Translation Merge(const Translation& from); + /** + * Takes the msgids of from and attempts to match them against msgid of + * this. When matched the msgid of from is copied to msgstr of this. + * @param from Translation to match from + * @param matched Number of matches + * @return Entries that failed to match + */ + Translation Match(const Translation& from, int& matches); + static TranslationLdb fromLDB(const std::string& filename, const std::string& encoding); static Translation fromLMT(const std::string& filename, const std::string& encoding); static Translation fromLMU(const std::string& filename, const std::string& encoding);