Skip to content

Loading…

fix for #29: Lots of defunct juffed processes #40

Merged
merged 1 commit into from

2 participants

@pvanek
Collaborator

Before this patch juffed used "enca" as a binary in QProcess call. These processes remained in system until the main juffed process ends - (there were thousands of defunct processes after the week of usage).

This patch removes runtime dependency to enca binary (optional).
This patch introduces compile-time/runtime dependency to libenca (optional). There is no QProcess in use, raw library is used (faster).

valgrind check for memory leaks passed

@Mezomish Mezomish merged commit 46166c1 into Mezomish:master
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Aug 17, 2012
  1. @pvanek
Showing with 88 additions and 32 deletions.
  1. +16 −0 CMakeLists.txt
  2. +72 −32 src/lib/Document.cpp
View
16 CMakeLists.txt
@@ -110,6 +110,19 @@ QT4_WRAP_UI ( juffed_qsci_UIS_H ${juffed_qsci_UIS} )
QT4_WRAP_UI ( juffed_app_UIS_H ${juffed_app_UIS} )
QT4_ADD_TRANSLATION ( juffed_QM ${juffed_TS} )
+
+include(FindPkgConfig)
+# try to find libenca - optional
+pkg_check_modules(ENCA enca)
+if (ENCA_FOUND)
+ add_definitions(-DHAVE_ENCA)
+ include_directories(${ENCA_INCLUDE_DIRS})
+else (ENCA_FOUND)
+ message(WARNING "No enca library found. Building without auto-language-detection")
+endif (ENCA_FOUND)
+
+
+
# include directories
include_directories(
${QT_INCLUDES}
@@ -224,6 +237,9 @@ target_link_libraries ( ${JUFFED}
${QT_LIBRARIES}
${QT_QTNETWORK_LIBRARY}
)
+if (ENCA_FOUND)
+ target_link_libraries( ${JUFFED} ${ENCA_LIBRARIES} )
+endif (ENCA_FOUND)
if ( UNIX )
set(CMAKE_CXX_FLAGS "-Wall -Werror -Wextra")
View
104 src/lib/Document.cpp
@@ -33,6 +33,11 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#include <QTextCodec>
#include <QTimer>
+#ifdef HAVE_ENCA
+#include <enca.h>
+#endif
+
+
QString mapCharset(const QString& encaName) {
if ( encaName == "windows-1251" ) {
return "Windows-1251";
@@ -168,46 +173,81 @@ void Document::setCharset(const QString& charset) {
// updateClone();
}
+#define ENCA_BUFFER_SIZE 500
QString Document::guessCharset(const QString& fileName) {
- QStringList params;
- params << "-m" << fileName;
+ QString output;
+ QFile file(fileName);
+ char buf[ENCA_BUFFER_SIZE];
+
+ if ( !file.open(QFile::ReadOnly) ) {
+ return output;
+ }
+
+ int len = file.read(buf, ENCA_BUFFER_SIZE);
+ file.close();
+
+#ifdef HAVE_ENCA
+ // first check for user defined "locale" in Juffed's prefs
QString lang = MainSettings::get(MainSettings::Language);
- if ( !lang.isEmpty() && lang.compare("auto") != 0 ) {
- params << "-L" << lang.left(2);
+ if (!lang.isEmpty() && lang.compare("auto") != 0 ) {
+ lang = lang.left(2);
+ }
+ else {
+ // on the other side guess the language by default locale
+ lang = QLocale().name().left(2);
+ // there is nospecial language handling for "en" in enca.
+ // So we will fake the encoding to "a default one" = "__"
+ if (lang == "en") lang = "__";
}
- QProcess enca;
- enca.start("enca", params);
- enca.waitForFinished();
-
- QString output = QString(enca.readAllStandardOutput()).simplified();
- if ( !output.isEmpty() ) {
- return mapCharset(output);
+ EncaAnalyser an = enca_analyser_alloc(lang.toAscii().constData());
+ if (!an) {
+ //size_t langcnt;
+ //const char** languages = enca_get_languages(&langcnt);
+ qWarning() << "Cannot allocate ENCA analyzer for" << lang.toAscii().constData();
+ //for (uint i = 0; i < langcnt; i++) {
+ // qDebug() << " lang: " << languages[i];
+ //}
}
else {
- // test for BOM
- QFile file(fileName);
- if ( file.open(QFile::ReadOnly) ) {
- char buf[3];
- int len = file.read(buf, 3);
-
- if ( len == 3 ) {
- unsigned char* uBuf = reinterpret_cast<unsigned char*>(buf);
- if ( uBuf[0] == 0xEF && uBuf[1] == 0xBB && uBuf[2] == 0xBF ) {
- output = "UTF-8";
- }
- else if ( uBuf[0] == 0xFE && uBuf[1] == 0xFF && uBuf[2] == 0x00 ) {
- output = "UTF-16BE";
- }
- else if ( uBuf[0] == 0xFF && uBuf[1] == 0xFE && uBuf[2] == 0x00 ) {
- output = "UTF-16LE";
- }
- }
- file.close();
+
+ // simulate "enca" binary environment
+ enca_set_threshold(an, 1.38);
+ enca_set_multibyte(an, 1);
+ enca_set_ambiguity(an, 1);
+ enca_set_garbage_test(an, 1);
+
+ // we use sized buffer which can split multibyte char in the middle so be polite
+ enca_set_termination_strictness(an, 0);
+
+ EncaEncoding enc = enca_analyse(an, (unsigned char*)buf, len);
+ if (enca_charset_is_known(enc.charset)) {
+ QString output = enca_charset_name(enc.charset, ENCA_NAME_STYLE_ENCA);
+ return mapCharset(output);
+ }
+ else {
+ qWarning() << "Enca cannot find the encodig. Continue tests.";
+ }
+ enca_analyser_free(an);
+ }
+
+#endif
+
+ // test for BOM
+ if ( len >= 3 ) {
+ unsigned char* uBuf = reinterpret_cast<unsigned char*>(buf);
+ if ( uBuf[0] == 0xEF && uBuf[1] == 0xBB && uBuf[2] == 0xBF ) {
+ output = "UTF-8";
+ }
+ else if ( uBuf[0] == 0xFE && uBuf[1] == 0xFF && uBuf[2] == 0x00 ) {
+ output = "UTF-16BE";
+ }
+ else if ( uBuf[0] == 0xFF && uBuf[1] == 0xFE && uBuf[2] == 0x00 ) {
+ output = "UTF-16LE";
}
-
- return output;
}
+
+ return output;
}
void Document::setSearchResults(Juff::SearchResults* results) {
Something went wrong with that request. Please try again.