Skip to content

Commit

Permalink
Fix Text::truncate() on single words.
Browse files Browse the repository at this point in the history
Fixing this issue required making a small behavior change around how
single unbreakable words are handled. Instead of being entirely omitted
as before, if a text fragment is not breakable, we do an exact slice.
This favors including *some* content over just the ellipsis. While this
is a behavior change, I don't think its very intuitive that an inexact
truncation will result in no text. This also changes how chains of
entities work as shown in the modified test case.

This method is in pretty rough shape and at some point in the future,
building a more robust HTML munger/tokenizer might be in order if we
continue to get issues reported for how the HTML option works.

Refs #8673
  • Loading branch information
markstory committed Apr 20, 2016
1 parent 228cee7 commit eb9ff41
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 74 deletions.
90 changes: 52 additions & 38 deletions src/Utility/Text.php
Expand Up @@ -608,49 +608,70 @@ public static function truncate($text, $length = 100, array $options = [])
}
}

$truncate .= mb_substr($tag[3], 0, $left + $entitiesLength);
if (!$options['exact']) {
$words = explode(' ', $tag[3]);
// Keep at least one word.
if (count($words) === 1) {
$truncate .= mb_substr($tag[3], 0, $left + $entitiesLength);
} else {
$wordLength = 0;
$addWords = [];
// Append words until the length is crossed.
foreach ($words as $word) {
// Add words until we have enough letters.
if ($wordLength < $left + $entitiesLength) {
$addWords[] = $word;
}
// Include inter-word space.
$wordLength += mb_strlen($word) + 1;
}
$truncate .= implode(' ', $addWords);

// If the string is longer than requested, find the last space and cut there.
$lastSpace = mb_strrpos($truncate, ' ');
if (mb_strlen($truncate) > $totalLength && $lastSpace !== false) {
$remainder = mb_substr($truncate, $lastSpace);
$truncate = mb_substr($truncate, 0, $lastSpace);

// Re-add close tags that were cut off.
preg_match_all('/<\/([a-z]+)>/', $remainder, $droppedTags, PREG_SET_ORDER);
if ($droppedTags) {
foreach ($droppedTags as $closingTag) {
if (!in_array($closingTag[1], $openTags)) {
array_unshift($openTags, $closingTag[1]);
}
}
}
}
}
} else {
$truncate .= mb_substr($tag[3], 0, $left + $entitiesLength);
}
break;
}

$truncate .= $tag[3];

$totalLength += $contentLength;
if ($totalLength >= $length) {
break;
}
}
} else {
if (mb_strlen($text) <= $length) {
return $text;

$truncate .= $options['ellipsis'];

foreach ($openTags as $tag) {
$truncate .= '</' . $tag . '>';
}
$truncate = mb_substr($text, 0, $length - mb_strlen($options['ellipsis']));
return $truncate;
}

if (mb_strlen($text) <= $length) {
return $text;
}
$truncate = mb_substr($text, 0, $length - mb_strlen($options['ellipsis']));

if (!$options['exact']) {
$spacepos = mb_strrpos($truncate, ' ');
if ($options['html']) {
$truncateCheck = mb_substr($truncate, 0, $spacepos);
$lastOpenTag = mb_strrpos($truncateCheck, '<');
$lastCloseTag = mb_strrpos($truncateCheck, '>');
if ($lastOpenTag > $lastCloseTag) {
preg_match_all('/<[\w]+[^>]*>/s', $truncate, $lastTagMatches);
$lastTag = array_pop($lastTagMatches[0]);
$spacepos = mb_strrpos($truncate, $lastTag) + mb_strlen($lastTag);
}
$bits = mb_substr($truncate, $spacepos);
preg_match_all('/<\/([a-z]+)>/', $bits, $droppedTags, PREG_SET_ORDER);
if (!empty($droppedTags)) {
if (!empty($openTags)) {
foreach ($droppedTags as $closingTag) {
if (!in_array($closingTag[1], $openTags)) {
array_unshift($openTags, $closingTag[1]);
}
}
} else {
foreach ($droppedTags as $closingTag) {
$openTags[] = $closingTag[1];
}
}
}
}
$truncate = mb_substr($truncate, 0, $spacepos);

// If truncate still empty, then we don't need to count ellipsis in the cut.
Expand All @@ -660,13 +681,6 @@ public static function truncate($text, $length = 100, array $options = [])
}

$truncate .= $options['ellipsis'];

if ($options['html']) {
foreach ($openTags as $tag) {
$truncate .= '</' . $tag . '>';
}
}

return $truncate;
}

Expand Down
60 changes: 24 additions & 36 deletions tests/TestCase/Utility/TextTest.php
Expand Up @@ -556,7 +556,7 @@ public function testTruncate()
$this->assertSame($this->Text->truncate($text1, 15, ['html' => true]), "The quick brow\xe2\x80\xa6");
$this->assertSame($this->Text->truncate($text1, 15, ['exact' => false, 'html' => true]), "The quick\xe2\x80\xa6");
$this->assertSame($this->Text->truncate($text2, 10, ['html' => true]), "Heiz&ouml;lr&uuml;c\xe2\x80\xa6");
$this->assertSame($this->Text->truncate($text2, 10, ['exact' => false, 'html' => true]), "Heiz&ouml;\xe2\x80\xa6");
$this->assertSame($this->Text->truncate($text2, 10, ['exact' => false, 'html' => true]), "Heiz&ouml;lr&uuml;c\xe2\x80\xa6");
$this->assertSame($this->Text->truncate($text3, 20, ['html' => true]), "<b>&copy; 2005-2007, Cake S\xe2\x80\xa6</b>");
$this->assertSame($this->Text->truncate($text4, 15, ['html' => true]), "<img src=\"mypic.jpg\"> This image ta\xe2\x80\xa6");
$this->assertSame($this->Text->truncate($text4, 45, ['html' => true]), "<img src=\"mypic.jpg\"> This image tag is not XHTML conform!<br><hr/><b>But the\xe2\x80\xa6</b>");
Expand All @@ -576,43 +576,31 @@ public function testTruncate()
'exact' => false,
'html' => true
]);
$expected = '<p><span style="font-size: medium;"><a>...</a></span></p>';
$this->assertEquals($expected, $result);

$text = '<p><span style="font-size: medium;">El biógrafo de Steve Jobs, Walter
Isaacson, explica porqué Jobs le pidió que le hiciera su biografía en
este artículo de El País.</span></p>
<p><span style="font-size: medium;"><span style="font-size:
large;">Por qué Steve era distinto.</span></span></p>
<p><span style="font-size: medium;"><a href="http://www.elpais.com/
articulo/primer/plano/Steve/era/distinto/elpepueconeg/
20111009elpneglse_4/Tes">http://www.elpais.com/articulo/primer/plano/
Steve/era/distinto/elpepueconeg/20111009elpneglse_4/Tes</a></span></p>
<p><span style="font-size: medium;">Ya se ha publicado la biografía de
Steve Jobs escrita por Walter Isaacson "<strong>Steve Jobs by Walter
Isaacson</strong>", aquí os dejamos la dirección de amazon donde
podeís adquirirla.</span></p>
<p><span style="font-size: medium;"><a>http://www.amazon.com/Steve-
Jobs-Walter-Isaacson/dp/1451648537</a></span></p>';
$result = $this->Text->truncate($text, 500, [
'ellipsis' => '... ',
$expected = '<p><span style="font-size: medium;"><a>Iamates...</a></span></p>';
$this->assertEquals($expected, $result);
}

/**
* Test truncate() method with both exact and html.
* @return void
*/
public function testTruncateExactHtml()
{
$text = '<a href="http://example.org">hello</a> world';
$expected = '<a href="http://example.org">hell..</a>';
$result = Text::truncate($text, 6, array(
'ellipsis' => '..',
'exact' => true,
'html' => true
));
$this->assertEquals($expected, $result);

$expected = '<a href="http://example.org">hell..</a>';
$result = Text::truncate($text, 6, array(
'ellipsis' => '..',
'exact' => false,
'html' => true
]);
$expected = '<p><span style="font-size: medium;">El biógrafo de Steve Jobs, Walter
Isaacson, explica porqué Jobs le pidió que le hiciera su biografía en
este artículo de El País.</span></p>
<p><span style="font-size: medium;"><span style="font-size:
large;">Por qué Steve era distinto.</span></span></p>
<p><span style="font-size: medium;"><a href="http://www.elpais.com/
articulo/primer/plano/Steve/era/distinto/elpepueconeg/
20111009elpneglse_4/Tes">http://www.elpais.com/articulo/primer/plano/
Steve/era/distinto/elpepueconeg/20111009elpneglse_4/Tes</a></span></p>
<p><span style="font-size: medium;">Ya se ha publicado la biografía de
Steve Jobs escrita por Walter Isaacson "<strong>Steve Jobs by Walter
Isaacson</strong>", aquí os dejamos la dirección de amazon donde
podeís adquirirla.</span></p>
<p><span style="font-size: medium;"><a>... </a></span></p>';
));
$this->assertEquals($expected, $result);
}

Expand Down

0 comments on commit eb9ff41

Please sign in to comment.