Skip to content

Commit

Permalink
fix segmentation on CJK characters when updating selection
Browse files Browse the repository at this point in the history
  • Loading branch information
chrox committed Jan 16, 2014
1 parent b839459 commit 6284768
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 18 deletions.
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,8 @@ fetchthirdparty:
git apply ../lvfntman-RegisterExternalFont.patch && \
git apply ../lvtinydom-registerEmbeddedFonts.patch && \
git apply ../epubfmt-EmbeddedFontStyleParser.patch && \
git apply ../lvtextfm-punctuation.patch
git apply ../lvtextfm-punctuation.patch && \
git apply ../lvtinydom-CJK-word.patch
# CREngine patch: disable fontconfig
grep USE_FONTCONFIG $(CRENGINE_DIR)/crengine/include/crsetup.h \
&& grep -v USE_FONTCONFIG \
Expand Down
90 changes: 73 additions & 17 deletions kpvcrlib/lvtextfm-punctuation.patch
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,85 @@ index 49125a7..2fd9f6a 100644
int maxw = 0;
for ( int i=0; chars[i]; i++ ) {
int w = getCharWidth( chars[i] );
diff --git a/crengine/include/lvstring.h b/crengine/include/lvstring.h
index 6985cd6..a38c4fa 100644
--- a/crengine/include/lvstring.h
+++ b/crengine/include/lvstring.h
@@ -25,6 +25,14 @@
#define UNICODE_NO_BREAK_SPACE 0x00a0
#define UNICODE_HYPHEN 0x2010
#define UNICODE_NB_HYPHEN 0x2011
+#define UNICODE_CJK_IDEOGRAPHS_BEGIN 0x4e00
+#define UNICODE_CJK_IDEOGRAPHS_END 0x9FFF
+#define UNICODE_CJK_PUNCTUATION_BEGIN 0x3000
+#define UNICODE_CJK_PUNCTUATION_END 0x303F
+#define UNICODE_GENERAL_PUNCTUATION_BEGIN 0x2000
+#define UNICODE_GENERAL_PUNCTUATION_END 0x206F
+#define UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_BEGIN 0xFF01
+#define UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_END 0xFFEE



@@ -91,6 +99,7 @@ int decodeDecimal( const lChar16 * str, int len );
#define CH_PROP_SIGN 0x0100 ///< sign character flag
#define CH_PROP_ALPHA_SIGN 0x0200 ///< alpha sign character flag
#define CH_PROP_DASH 0x0400 ///< minus, emdash, endash, ... (- signs)
+#define CH_PROP_CJK 0x0800 ///< CJK ideographs

/// retrieve character properties mask array for wide c-string
void lStr_getCharProps( const lChar16 * str, int sz, lUInt16 * props );
diff --git a/crengine/src/lvstring.cpp b/crengine/src/lvstring.cpp
index 27adddd..21611a0 100644
--- a/crengine/src/lvstring.cpp
+++ b/crengine/src/lvstring.cpp
@@ -4063,6 +4063,12 @@ inline lUInt16 getCharProp(lChar16 ch) {
return char_props_1f00[ch & 255];
else if (ch>=0x2012 && ch<=0x2015)
return CH_PROP_DASH|CH_PROP_SIGN;
+ else if (ch>=UNICODE_CJK_IDEOGRAPHS_BEGIN && ch<=UNICODE_CJK_IDEOGRAPHS_END)
+ return CH_PROP_CJK;
+ else if ((ch>=UNICODE_CJK_PUNCTUATION_BEGIN && ch<=UNICODE_CJK_PUNCTUATION_END) ||
+ (ch>=UNICODE_GENERAL_PUNCTUATION_BEGIN && ch<=UNICODE_GENERAL_PUNCTUATION_END) ||
+ (ch>=UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_BEGIN && ch<=UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_END))
+ return CH_PROP_PUNCT;
return 0;
}

diff --git a/crengine/src/lvtextfm.cpp b/crengine/src/lvtextfm.cpp
index 57bbf3b..e6f5913 100755
index 57bbf3b..5239f9a 100755
--- a/crengine/src/lvtextfm.cpp
+++ b/crengine/src/lvtextfm.cpp
@@ -712,7 +712,7 @@ public:
} else {
lastWord = true;
}
- if ( i>wstart && (newSrc!=lastSrc || space || lastWord) ) {
+ if ( i>wstart && (newSrc!=lastSrc || space || lastWord || isCJKIdeograph(m_flags[i])) ) {
+ if ( i>wstart && (newSrc!=lastSrc || space || lastWord || isCJKIdeograph(m_text[i])) ) {
// create and add new word
formatted_word_t * word = lvtextAddFormattedWord(frmline);
int b;
@@ -811,7 +811,9 @@ public:
@@ -794,7 +794,10 @@ public:
// CRLog::trace("Double nbsp text[-1]=%04x", m_text[wstart-1]);
// else
frmline->words[frmline->word_count-2].flags |= LTEXT_WORD_CAN_ADD_SPACE_AFTER;
- } if ( m_flags[i-1] & LCHAR_ALLOW_WRAP_AFTER )
+ } else if (frmline->word_count>1 && isCJKIdeograph(m_text[i])) {
+ frmline->words[frmline->word_count-2].flags |= LTEXT_WORD_CAN_ADD_SPACE_AFTER;
+ }
+ if ( m_flags[i-1] & LCHAR_ALLOW_WRAP_AFTER )
word->flags |= LTEXT_WORD_CAN_BREAK_LINE_AFTER;
if ( word->t.start==0 && srcline->flags & LTEXT_IS_LINK )
word->flags |= LTEXT_WORD_IS_LINK_START;
@@ -803,7 +806,7 @@ public:
int endp = i-1;
int lastc = m_text[endp];
int wAlign = font->getVisualAligmentWidth();
- word->width += wAlign;
+ word->width += wAlign/2;
while ( (lastc==' ') && endp>0 ) { // || lastc=='\r' || lastc=='\n'
word->width -= m_widths[endp] - m_widths[endp-1];
endp--;
@@ -811,7 +814,9 @@ public:
}
if ( word->flags & LTEXT_WORD_CAN_HYPH_BREAK_LINE_AFTER ) {
word->width -= font->getHyphenWidth(); // TODO: strange fix - need some other solution
Expand All @@ -35,19 +100,10 @@ index 57bbf3b..e6f5913 100755
int w = font->getCharWidth(lastc);
TR("floating: %c w=%d", lastc, w);
word->width -= w;
@@ -869,6 +871,29 @@ public:
@@ -869,6 +874,20 @@ public:
return 0;
}

+#define UNICODE_CJK_IDEOGRAPHS_BEGIN 0x4e00
+#define UNICODE_CJK_IDEOGRAPHS_END 0x9FFF
+#define UNICODE_CJK_PUNCTUATION_BEGIN 0x3000
+#define UNICODE_CJK_PUNCTUATION_END 0x303F
+#define UNICODE_GENERAL_PUNCTUATION_BEGIN 0x2000
+#define UNICODE_GENERAL_PUNCTUATION_END 0x206F
+#define UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_BEGIN 0xFF01
+#define UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_END 0xFFEE
+
+ bool isCJKIdeograph(lChar16 c) {
+ return c >= UNICODE_CJK_IDEOGRAPHS_BEGIN && c <= UNICODE_CJK_IDEOGRAPHS_END;
+ }
Expand All @@ -65,15 +121,15 @@ index 57bbf3b..e6f5913 100755
/// Split paragraph into lines
void processParagraph( int start, int end )
{
@@ -927,7 +952,6 @@ public:
@@ -927,7 +946,6 @@ public:
}
}
}
- maxWidth -= visialAlignmentWidth;
}
#endif

@@ -944,6 +968,8 @@ public:
@@ -944,6 +962,8 @@ public:
int lastMandatoryWrap = -1;
int spaceReduceWidth = 0; // max total line width which can be reduced by narrowing of spaces
int firstCharMargin = getAdditionalCharWidthOnLeft(pos); // for first italic char with elements below baseline
Expand All @@ -82,7 +138,7 @@ index 57bbf3b..e6f5913 100755
for ( i=pos; i<m_length; i++ ) {
if ( x + m_widths[i]-w0 > maxWidth + spaceReduceWidth - firstCharMargin)
break;
@@ -952,7 +978,7 @@ public:
@@ -952,7 +972,7 @@ public:
lastMandatoryWrap = i;
break;
}
Expand All @@ -91,7 +147,7 @@ index 57bbf3b..e6f5913 100755
lastNormalWrap = i;
else if ( flags & LCHAR_DEPRECATED_WRAP_AFTER )
lastDeprecatedWrap = i;
@@ -1029,6 +1055,32 @@ public:
@@ -1029,6 +1049,32 @@ public:
}
bool needReduceSpace = true; // todo: calculate whether space reducing required
int endp = wrapPos+(lastMandatoryWrap<0 ? 1 : 0);
Expand Down
15 changes: 15 additions & 0 deletions kpvcrlib/lvtinydom-CJK-word.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
diff --git a/crengine/src/lvtinydom.cpp b/crengine/src/lvtinydom.cpp
index fe2b879..df50386 100755
--- a/crengine/src/lvtinydom.cpp
+++ b/crengine/src/lvtinydom.cpp
@@ -6874,6 +6874,10 @@ public:
_list.add( ldomWord( node, beginOfWord, i ) );
beginOfWord = -1;
}
+ if (lGetCharProps(text[i]) == CH_PROP_CJK) {
+ _list.add( ldomWord( node, i, i+1 ) );
+ beginOfWord = -1;
+ }
}
}
/// called for each found node in range

0 comments on commit 6284768

Please sign in to comment.