diff --git a/core/commands.php b/core/commands.php index 01b285b..f03a20b 100644 --- a/core/commands.php +++ b/core/commands.php @@ -21,6 +21,9 @@ class commands /** @var \eb\telegram\core\forum_api */ private $forum_api; + /** @var \eb\telegram\core\formatters */ + private $formatters; + /** * Constructor * @@ -36,6 +39,7 @@ public function __construct(\phpbb\config\config $config, $this->config = $config; $this->language = $language; $this->forum_api = $forum_api; + $this->formatters = new \eb\telegram\core\formatters(/*$config, $language*/); } public function onButtonOutdated($command) @@ -283,7 +287,7 @@ public function onShowTopic($command) $topics = $this->forum_api->selectForumTopics($user_id, $command['forum_id']); $topic_id = $topics[$topic_index]['topic_id'] ?? 0; } - //Permission check needed + //Permission check needed $posts = $this->forum_api->selectTopicPosts($user_id, $topic_id); // Page refers to the page, of the topics-list. $page = $command['page']; @@ -312,7 +316,10 @@ public function onShowTopic($command) // "Title: $title\n"; $text .= $this->language->lang('EBT_TOPIC_TITLE', $title, $viewtopic_url . $topic_id) . PHP_EOL; $text .= $not_approved; - $text .= $post['text']; + //Add an invisible marker, where the text could be split, if the + //full post is too long, to be displayed. + $text .= "\u{200B}\u{200B}"; + $text .= $this->formatters->format_post_for_telegram($post['text']); $readonly = !$post['reply']; $first = false; } else @@ -320,7 +327,7 @@ public function onShowTopic($command) // "$time: Reply from $user\n"; $text .= $this->language->lang('EBT_REPLY_AT_BY', $time, $user) . PHP_EOL; $text .= $not_approved; - $text .= $post['text']; + $text .= $this->formatters->format_post_for_telegram($post['text']); } $text .= PHP_EOL . '___________________________________' . PHP_EOL; } @@ -395,14 +402,14 @@ public function onSaveNewTopic($command) return $this->onShowPermissions($command); } $title = $command['title']; - $content = $this->format_text($command['text'], $command['entities'] ?? array()); + $content = $this->formatters->format_input($command['text'], $command['entities'] ?? array()); $user = $command['user']; $forum_id = $command['forum_id']; - $saved = $this->forum_api->insertNewPost(true, $forum_id, $title, $content, $user); + $topic_id = $this->forum_api->insertNewPost(true, $forum_id, $title, $content, $user); //Reset chat_state to topic-display (for back commands) $this->forum_api->store_telegram_chat_state($command['chat_id'], 0, 'T'); - if ($saved) + if ($topic_id) { // The following post was saved. $text = $this->language->lang('EBT_TOPIC_SAVED') . PHP_EOL; @@ -411,7 +418,9 @@ public function onSaveNewTopic($command) $text .= $this->language->lang('EBT_TOPIC_TITLE', $title, '') . PHP_EOL; $text .= $content; $buttons = array($this->language->lang('EBT_BACK') => 'allForumTopics'); - return [$text, $buttons]; + + $command['topic_id'] = $topic_id; + return $this->onShowTopic($command); } else { return $this->errorOnSave(); @@ -426,16 +435,18 @@ public function onSaveNewPost(&$command) { return $this->onShowPermissions($command); } - $command['text'] = $this->format_text($command['text'], $command['entities'] ?? array()); + $command['text'] = $this->formatters->format_input($command['text'], $command['entities'] ?? array()); $saved = $this->forum_api->insertNewPost(false, $command['forum_id'], $command['topic_id'], $command['text'], $command['user']); //Reset chat_state to topic-display (for back commands) $this->forum_api->store_telegram_chat_state($command['chat_id'], 0, 'T'); - if ($saved === true) + if ($saved) { return $this->onShowTopic($command); } else { - $command['admin_info'] = $saved; + $topic_id = $command['topic_id']; + $username = $command['user']['username']; + $command['admin_info'] = "New post for topic $topic_id could not be saved, by user $username"; return $this->errorOnSave(); } } diff --git a/core/formatters.php b/core/formatters.php new file mode 100644 index 0000000..b895169 --- /dev/null +++ b/core/formatters.php @@ -0,0 +1,275 @@ +config = $config; + $this->language = $language; + } + */ + + /** Format a phpbb-post, such that the formatting information + * is transformed into valid telegram formatting. + */ + public function format_post_for_telegram($text) { + $ent = ENT_SUBSTITUTE | ENT_HTML401; //Don't substitute quotes + //The telegram bot only allows a predefined set of HTLM-Tags. + //The forum posts however surround each opening BBCode with an -Tag (and each closing with an ) + + //At first we remove all these tags and their BBCodes, we do not want to show + //up in telegram. + $bbcode_pattern = '~\[((i|b|u|url|email)(?:(?:=|\s).*)?)\](.*?)\[/\\2]~'; + do + { + $text = preg_replace($bbcode_pattern, '$3', $text, 1, $count); + } while ($count > 0); + + $allowed_tags = ['url', 'img', 'b', 'strong', 'i', 'em', 'u', 'ins', 'strike', 'del', 'a', 'code', 'pre']; + + //Now remove all tags, which we do not allow. (Exception list contains all allowed tags.) + //This would not be necessary for "normal" text. But when the forum sends notifications, + //a lot of additional tags are surrounding the text, BB-Codes, links etc. + //$text = strip_tags($text, $allowed_tags); //Needs php version >= 7.4 + $text = strip_tags($text, '<' . implode('><',$allowed_tags) . '>'); + + //Special handling of the url tag: Replace by + $url_pattern = '~(.*?)~is'; + $text = preg_replace($url_pattern, '$2', $text); + + //Special handling of the img tag: + //Replace the content of the anchor tag with a -Placeholder + $img_pattern = '~\[img](.*?)\[/img]~is'; + $text = preg_replace($img_pattern, '<<IMAGE>>', $text); + + //Add server address to relative links (no http-protokoll) starting with a slash (/) + $rel_url_pattern = '~(.*?)~is'; + $replace = '$2'; + $text = preg_replace($rel_url_pattern, $replace, $text); + + //Add server address to relative links (no http-protokoll) NOT starting with a slash (/) + $rel_url_pattern = '~(.*?)~is'; + $replace = '$2'; + $text = preg_replace($rel_url_pattern, $replace, $text); + + //Add a non printable space (ZWSP) to all forward slashes. + //By that, telegram does not treat the forward slash as the beginning of a command. + //Exclude double // and slashes belonging to html-tags + $text = preg_replace('~([^<]/)([^/])~', "$1\u{200B}$2", $text); + + //Revert this, for all hrefs in anchors. + //By that, telegram does not treat the forward slash as the beginning of a command. + do + { + $text = preg_replace("~()~", '$1$2', $text, 1, $count); + } while ($count > 0); + return $text; + } + + /** Implements the substring function (without length-param) + * such that html-tags are still correctly opened and closed. + * Cut the beginning of a text at the given offset. + * If the offset happens to lay inside a tagged area, + * the possibly cut off start tags are added again to the beginning of the + * text, such that the tags are still opened and closed correctly. + * If the offset would cut a start tag (before the ending >) or + * an end tag into pieces, the $offset is moved behind the closing >. + * By adding the start-tags before the offset, the text-length is increased. + * To ensure, the text is not longer, than it would be expected by the offset, + * the offset is increased, until the total lenght is less or equal + * than mb_strlen($text) - $offset. + */ + public function tag_aware_substr($tagged_text, $offset) { + $tags = $this->parse_tags($tagged_text); + $prefix = ''; + $cut_point = $offset - 1; + do + { + $cut_point++; + $prefix = $this->adapt_cut_point($tags, $cut_point); + + } while ($cut_point - mb_strlen($prefix) < $offset); + return $prefix . mb_substr($tagged_text, $cut_point); + } + + /** For a text, which is to be cut, adapt the cut-point, such + * that it does not cut a start or end tag of the text, and + * return the start-tags, which must be added as prefix, because the + * cut text still contains the corresponding end tags. + */ + public function adapt_cut_point($tags, &$offset) { + $print = array(); + //Move the offset, such that no tag is split + foreach ($tags as $tag) { + if ($offset <= $tag['full_s'] || $offset >= $tag['full_e']) + { + continue; //We are outside the enclosing tags. + } + if ($offset < $tag['full_s'] + mb_strlen($tag['s_tag'])) + { + //Start tag would be cut. Move offset behind start-tag + $start_tag = $tag['s_tag']; + $offset = $tag['full_s'] + mb_strlen($start_tag); + break; //Tags are sequential. Cannot happen again. + } + if ($offset >= $tag['full_e'] - mb_strlen($tag['e_tag'])) + { + //End tag would be cut, move $offset behind the end-tag. + //A pure endtag without previous content also does not make sense, + //therefore, this is also skipped. (>= in the condition above) + $offset = $tag['full_e']; + break; //Tags are sequential. Cannot happen again. + } + } + //Collect the start tags, which must be added to the beginning, such that + //their is no end-tag with missing start tag + $start_tags = array(); + foreach ($tags as $tag) { + if ($offset > $tag['full_s'] && $offset < $tag['full_e']) + { + $start_tag = $tag['s_tag']; + $pos = $tag['full_s']; //keep start order + $start_tags[$pos] = $start_tag; + } + } + ksort($start_tags); + return implode('', $start_tags); + } + + /** Find the start and end of text enclosed in html tags. + * The result is an array containing all tagged texts (alos if nested) + * in the following form: + * array( + * array('full' => full tag enclosed text, + * 'full_s' => offset, where full text starts + * 'full_e' => offset, where full text ends + * 's_tag' => complete start tag (including attributes) + * 'e_tag' => complete end tag + * )) + * In case of self closing tags, e_tag is empty. + */ + public function parse_tags($tagged_text) { + $tag_pattern = "/<([\w]+)([^>]*?)(?:([\s]*\/>)|(?:(>)(?:(?:(?:[^<]*?|<\!\-\-.*?\-\->)|(?R))*)(<\/\\1[\s]*>)))/xsmu"; + $result[] = array('full' => ' ' . $tagged_text, 'full_s' => -1); + for($i = 0; $i < count($result); $i++) { + $full = $result[$i]['full']; + $offset = $result[$i]['full_s']; + $t_count = preg_match_all($tag_pattern, mb_substr($full,1), $matches, PREG_OFFSET_CAPTURE); + if ($t_count) { + $j = 0; + foreach($matches[0] as $match) { + $mb_offset = mb_strlen(substr(mb_substr($full,1), 0, $match[1])); + $result[] = array( + 'full' => $match[0], + 'full_s' => $mb_offset + 1 + $offset, + 'full_e' => $mb_offset + 1 + mb_strlen($match[0]) + $offset, + 's_tag' => '<' . $matches[1][$j][0] . $matches[2][$j][0] . $matches[3][$j][0] . $matches[4][$j][0], + 'e_tag' => $matches[5][$j][0], + ); + $j++; + } + } + } + //Remove the first full text entry + return array_slice($result,1); + } + + /** Format the telegram input by adding bbCodes according to the formatting information, + * which telegram sends as so called entities. + */ + public function format_input($text, $entities) + { + /* Split the text, at every point where a formatting starts or ends into an array. + * Therefore we collect at first all splitpoints, and remove duplicates. + */ + $split_points[] = 0; + foreach ($entities as $entity) + { + $split_points[] = $entity->offset; + $split_points[] = $entity->offset + $entity->length; + } + $split_points = array_unique($split_points); + rsort($split_points); + $chunks = array(); + foreach ($split_points as $point) + { + $chunks[$point] = mb_substr($text, $point); + $text = mb_substr($text, 0, $point); + } + ksort($chunks); + //Sort by end of formatting, such that in case of overlapping formats, the opening tag + //for the format, that gets closed last is placed at first. + usort($entities, function($a, $b) + { + return (($a->offset + $a->length) < ($b->offset + $b->length)) ? -1 : 1; + }); + foreach ($entities as $entity) + { + $bbcode = $this->get_bbcode($entity->type); + if (!$bbcode) + { + continue; + } + $chunks[$entity->offset] = $bbcode . $chunks[$entity->offset]; + } + for ($i = count($entities) - 1; $i >= 0; $i--) + { + $entity = $entities[$i]; + $bbcode = $this->get_bbcode($entity->type, false); + if (!$bbcode) + { + continue; + } + $bbcode_start = $this->get_bbcode($entity->type); + $end = $entity->offset + $entity->length; + if (strpos($chunks[$end], $bbcode_start) === 0) + { + //Remove ending tag immediatly followed by starting tag + $chunks[$end] = substr($chunks[$end], strlen($bbcode_start)); + } else + { + $chunks[$end] = $bbcode . $chunks[$end]; + } + } + //Remove non printable whitespace, which may have been included, when user copies + //a part of a post, where the whitespace was added. (See telegrami_api->htmlentitiesForTelegram) + $text = implode('', $chunks); + $text = str_replace("/\u{200B}", "/", $text); + return $text; + } + + private function get_bbcode($format_type, $start = true) + { + switch ($format_type) + { + case 'bold': return $start ? '[b]' : '[/b]'; + case 'italic': return $start ? '[i]' : '[/i]'; + case 'underline': return $start ? '[u]' : '[/u]'; + case 'code': return $start ? '[code]' : '[/code]'; + case 'pre': return $start ? '[code]' : '[/code]'; + case 'strikethrough': return $start ? '' : ''; + case 'url': return ''; //No need for BBCode + default: return false; + } + } + +} diff --git a/core/forum_api.php b/core/forum_api.php index fdd1af8..c981dc5 100644 --- a/core/forum_api.php +++ b/core/forum_api.php @@ -520,7 +520,8 @@ public function insertNewPost($new_topic, $forum_id, $topic_id_or_title, $text, { $user->data[$prop] = $userOrigData[$prop]; } - return $url ? true : false; + //For new topics, topic_id was set in submit_post. + return $url ? $data['topic_id'] : false; } private function print_formatted($obj) diff --git a/core/telegram_api.php b/core/telegram_api.php index df80330..5c66eb6 100644 --- a/core/telegram_api.php +++ b/core/telegram_api.php @@ -30,6 +30,7 @@ public function __construct(\phpbb\config\config $config, { $this->config = $config; $this->language = $language; + $this->formatters = new \eb\telegram\core\formatters(); } /** Get the name of the bot */ @@ -167,23 +168,27 @@ public function prepareMessage($text, $buttons = false) return $message; } - private function prepareText($org_text) + /** Shorten the text if necessary. Keep the html-tags intact. */ + private function prepareText($text) { - $text = $this->htmlentitiesForTelegram($org_text); - // Return the text from its XML form to its original plain text form - if (strlen($text) >= 4096) + $maxlen = 4096; + if (mb_strlen($text) > $maxlen) { - // Warning: Topic is too long and was cut. Telegram doesn \'t allow more than 4096 characters !', - $pretext = $this->language->lang('EBT_TOPIC_SHORTENED') . PHP_EOL . '...' . PHP_EOL; - $len = 4095 - strlen($pretext); - while (strlen($text) >= 4069) + //Split the text a two consecutive ZWSPs, if found + $splitmarker = "\u{200B}\u{200B}"; + $pos = strpos($text, $splitmarker); + $title = ''; + if ($pos !== false) { - $len--; - $text = mb_substr($org_text, -$len); - //To avoid open tags, we need to encode html-chars again, after the text was shortend - $text = $this->htmlentitiesForTelegram($text); - $text = $pretext . $text; + $title = substr($text, 0, $pos); + $text = substr($text,$pos + strlen($splitmarker)); } + // Warning: Topic is too long and was cut. Telegram doesn \'t allow more than 4096 characters !', + $pretext = $this->language->lang('EBT_TOPIC_SHORTENED') . PHP_EOL . '...' . PHP_EOL; + $remaining_len = $maxlen - mb_strlen($pretext) - mb_strlen($title); + $offset = mb_strlen($text) - $remaining_len; + $text = $this->formatters->tag_aware_substr($text, $offset); + $text = $title . $pretext . $text; } return $text; } @@ -195,12 +200,12 @@ private function prepareText($org_text) private function prepare_button_text($text) { $text = strip_tags($text); + //Button-texts do not need html-encoding + $text = html_entity_decode($text); if (mb_strlen($text) > 24) { $text = mb_substr($text, 0, 20) . ' ...'; //Multibyte-safe cut } - //Button-texts do not need html-encoding - $text = html_entity_decode($text); return $text; } @@ -217,7 +222,7 @@ private function htmlentitiesForTelegram ($text) $allowed_tags_bar_separated = implode('|', $allowed_tags); //Match for opening tags with optional attributes, followed by any text, followed by the same closing tag. //Use https://regexper.com/ to visualize the pattern - //("\\" must be replaced by "\" and "/" by "\/" for this tool ) + //("\\" (backreference) must be replaced by "\" and "/" by "\/" for this tool ) //https://regexper.com/#%26lt%3B%28%28list%7Cof%7Callowed%7Ctags%29%28%3F%3A%28%3F%3A%5Cs%2B%5Cw%2B%3F%28%3F%3A%5Cs*%3D%5Cs*%28%3F%3A%5C%22%5B%5E%5C%22%5D*%5C%22%7C'%5B%5E'%5D*'%29%29%29%2B%5Cs*%7C%5Cs*%29%29%26gt%3B%28.*%3F%29%26lt%3B%5C%2F%5C2%5Cs*%26gt%3B $pattern = "~<(($allowed_tags_bar_separated)(?:(?:\s+\w+?(?:\s*=\s*(?:\"[^\"]*\"|'[^']*')))+\s*|\s*))>(.*?)</\\2\s*>~is"; //Groups: 1: Full tag-content including attributs, 2: tag-name, 3: content between tags. @@ -248,6 +253,7 @@ private function htmlentitiesForTelegram ($text) //Add a non printable space (ZWSP) to all forward slashes, which do not belong to an HTML-Tag. //By that, telegram does not treat the forward slash as the beginning of a command. //$text = preg_replace('~([^<]/)~', "$0\u{200B}", $text); + return $text; } diff --git a/tests/core/format_test.php b/tests/core/format_test.php index 771cfa7..52c4554 100644 --- a/tests/core/format_test.php +++ b/tests/core/format_test.php @@ -17,29 +17,41 @@ /** Test the formatting of telegram input*/ class format_test extends \phpbb_test_case { - /** @var \eb\telegram\core\commands */ - private $commands; + /** @var \eb\telegram\core\formatters */ + private $formatters; public function setUp(): void { parent::setUp(); - $this->commands = $this->getMockBuilder('\eb\telegram\core\commands') - ->disableOriginalConstructor() - ->setMethodsExcept(['format_text']) - ->getMock(); + $this->config = $this->getMockBuilder('\phpbb\config\config') + ->disableOriginalConstructor() + ->getMock(); + //Config entries expected by generate_board_url() + $this->config->expects($this->any()) + ->method('offsetGet') + ->willReturnMap([ //Map param(s) to return value + ['force_server_vars', true], + ['server_protocol', 'http://'], + ['server_name', 'server.name'], + ['server_port', ''], + ['script_path', '/phpbb'], + ['cookie_secure', ''], + ]); + + $this->formatters = new \eb\telegram\core\formatters(); } //no formatting at all - public function test_plain() + public function test_format_input_plain() { $input = "Some text"; - $formatted = $this->commands->format_text($input, array()); + $formatted = $this->formatters->format_input($input, array()); //No change expected; $this->assertEquals($formatted, $input); } //multiple formats one after the other - public function test_sequential() + public function test_format_input_sequential() { $input = 'A bold italic underlined text'; $entities = json_decode( '[{' . @@ -56,12 +68,12 @@ public function test_sequential() '"type": "underline"' . '}]'); $expected = 'A [b]bold[/b] [i]italic[/i] [u]underlined[/u] text'; - $formatted = $this->commands->format_text($input, $entities); + $formatted = $this->formatters->format_input($input, $entities); $this->assertEquals($expected, $formatted); } //Multiple formats nested - public function test_nested() + public function test_format_input_nested() { $input = 'A_bold_italic_underlined_italic_bold_text'; $entities = json_decode('[{' . @@ -90,14 +102,14 @@ public function test_nested() '"type": "underline"' . '}]'); $expected = 'A_[b]bold_[i]italic_[u]underlined_[/u]italic[/i]_bold[/b]_text'; - $formatted = $this->commands->format_text($input, $entities); + $formatted = $this->formatters->format_input($input, $entities); $this->assertEquals($expected, $formatted); } /** Multiple formats overlapping: bold starts, italic starts, bold ends, italic ends. * (Will usually be split already by telegram) */ - public function test_overlapping() + public function test_format_input_overlapping() { $input = 'A bold italic bold_end italic_end text'; $entities = json_decode('[{' . @@ -110,7 +122,7 @@ public function test_overlapping() '"type": "italic"' . '}]'); $expected = 'A [b]bold [i]italic [/b]bold_end [/i]italic_end text'; - $formatted = $this->commands->format_text($input, $entities); + $formatted = $this->formatters->format_input($input, $entities); $this->assertEquals($expected, $formatted); //This is how telegram would send it: @@ -128,13 +140,13 @@ public function test_overlapping() '"type": "bold"' . '}]'); $expected = 'A [b]bold [/b][i][b]italic[/b] bold_end [/i]italic_end text'; - $formatted = $this->commands->format_text($input, $entities); + $formatted = $this->formatters->format_input($input, $entities); $this->assertEquals($expected, $formatted); } /** Test with unicode-characters. */ - public function test_umlauts() + public function test_format_input_umlauts() { $input = 'Check formäätting with ÄÖÜ.'; $input = "Check form\u{00e4}\u{00e4}tting with \u{00c4}\u{00d6}\u{00dc}."; @@ -148,8 +160,141 @@ public function test_umlauts() '"type": "italic"' . '}]'); $expected = 'Check [b]formäätting[/b] with [i]ÄÖÜ[/i].'; - $formatted = $this->commands->format_text($input, $entities); + $formatted = $this->formatters->format_input($input, $entities); $this->assertEquals($expected, $formatted); } + /** Test the formatting of a post with lots of + * different BBCodes. + */ + public function test_format_post_for_telelegram() + { + global $config; + $config = $this->config; + //This is the typical DB-content for a post. + $input = <<<'EOD' +mention -> [quote]Quote something[/quote] +bot_command -> /should not be treated as command
+url1 -> [url=http://google.com]BBCode-Url with text[/url]
+url2 -> [url]http://google.com[/url] (BBCode-Url without text)
+url3 without bbcode: http://google.com
+email -> [email]email@for.you[/email] (Email in BBCode)
+email without BBCode: email@for.you
+[b]bold text[/b]
+[i]italic text[/i]
+[u]underlined text[/u]
+HTML-tags: <strike>Has no effect</strike> +[code]A piece of code[/code] +[img]https://upload.wikimedia.org/wikipedia/commons/4/4a/Dot-yellow.gif[/img]
+Image with relative link1: [img]./styles/moschistyle32/theme/images/Moschifreunde.jpg[/img]
+Image with relative link2: [img]./styles/moschistyle32/theme/images/Moschifreunde.jpg[/img]
+[attachment]an attachment[/attachment]
+[color=red]Red color[/color]
+[size=110]A bit bigger[/size] +[list]
  • Start of List
  • +
  • [*]first list item
  • +[/list]
    +EOD; + $expected = <<BBCode-Url with text
    +url2 -> http://\u{200B}google.com (BBCode-Url without text) +url3 without bbcode: http://\u{200B}google.com +email -> email@for.you (Email in BBCode) +email without BBCode: email@for.you +bold text +italic text +underlined text +HTML-tags: <strike>Has no effect</\u{200B}strike> +[code]A piece of code[/\u{200B}code] +<<IMAGE>> +Image with relative link1: <<IMAGE>> +Image with relative link2: <<IMAGE>> +[attachment]an attachment[/\u{200B}attachment] +[color=red]Red color[/\u{200B}color] +[size=110]A bit bigger[/\u{200B}size] +[list]Start of List +[*]first list item +[/\u{200B}list] +EOD; + $formatted = $this->formatters->format_post_for_telegram($input); + $this->assertEquals($expected, $formatted); + } + + public function test_parse_nested_tags() + { + $input = <<<'EOD' +url1 -> BBCode-Url with text +bold italic (self closing br
    )underlined +with umlauts: ÄÜÖäöüßnested italic text text
    text
    +[code]A piece of code[/\u{200B}code] +EOD; + $tag_info = $this->formatters->parse_tags($input); + $expected = [ + 'BBCode-Url with text', + "bold italic (self closing br
    )underlined \nwith umlauts: ÄÜÖäöüßnested italic text text
    text
    ", + '[code]A piece of code[/\u{200B}code]', + "italic (self closing br
    )underlined \nwith umlauts: ÄÜÖäöüßnested italic text text
    ", + '
    ', + "underlined \nwith umlauts: ÄÜÖäöüßnested italic text", + 'nested italic', + ]; + $full_texts = array_column($tag_info, 'full'); + //$this->assertEquals('', print_r($tag_info, true)); //for output of $tag_info + $this->assertEquals($expected, $full_texts); + } + + public function tag_aware_substr_data_provider() { + return array ( + [ 0, 'url1 -> B'], //whole text + [ 11, 'BBCode-Url w'], //Before a-tag + [ 12, 'BCode-Url wi'], //a-tag would be cut + [ 30, 'tbold'], //1 before end of a-tag + [ 31, 'bold italic (self closing br
    '], //would lead to empty a-tag + [ 33, 'bold italic (self closing br
    '], //inside ending a-tag + [ 35, 'bold italic (self closing br
    '], //just before B-tag + [129, 'ßnested italic text '], //cut the starting i + [130, 'nested italic text t'], //exactly after starting i + [131, 'ested italic text te'], //exactly after starting i + ); + } + + /** @dataProvider tag_aware_substr_data_provider */ + public function test_tag_aware_substr($offset, $expected_text40) + { + $input = <<BBCode-Url with text +bold italic (self closing br
    )underlined with umlauts: ÄÜÖäöüßnested italic text text
    text
    +[code]A piece of code[/\u{200B}code] +EOD; + $input = str_replace("\n", '', $input); + $text = $this->formatters->tag_aware_substr($input, $offset); + $this->assertEquals($expected_text40, mb_substr($text,0,40)); + } + + public function test_tag_aware_substr_len() + { + $input = <<BBCode-Url with text +bold italic (self closing br
    )underlined with umlauts: ÄÜÖäöüßnested italic text text
    text
    +[code]A piece of code[/\u{200B}code] +EOD; + $input = str_replace("\n", '', $input); + for ($i = 0; $i < mb_strlen($input); $i++) + { + $text = $this->formatters->tag_aware_substr($input, $i); + $exp_len = mb_strlen($input) - $i; + $len = mb_strlen($text); + $this->assertLessThanOrEqual($exp_len, $len); + //Putting the a-tag in front of the text, would lead to + //an empty end-a-tag. Thus the offset is moved even + //behind the end-a-tag, which sums up to 31 chars. + $this->assertGreaterThanOrEqual($exp_len, $len+32); + $print[] = "$i: $exp_len -> $len"; + } + //For output of length-info: + //$this->assertEquals('', implode("\n", $print)); + } + }