diff --git a/core/commands.php b/core/commands.php
index 01b285b..f03a20b 100644
--- a/core/commands.php
+++ b/core/commands.php
@@ -21,6 +21,9 @@ class commands
/** @var \eb\telegram\core\forum_api */
private $forum_api;
+ /** @var \eb\telegram\core\formatters */
+ private $formatters;
+
/**
* Constructor
*
@@ -36,6 +39,7 @@ public function __construct(\phpbb\config\config $config,
$this->config = $config;
$this->language = $language;
$this->forum_api = $forum_api;
+ $this->formatters = new \eb\telegram\core\formatters(/*$config, $language*/);
}
public function onButtonOutdated($command)
@@ -283,7 +287,7 @@ public function onShowTopic($command)
$topics = $this->forum_api->selectForumTopics($user_id, $command['forum_id']);
$topic_id = $topics[$topic_index]['topic_id'] ?? 0;
}
- //Permission check needed
+ //Permission check needed
$posts = $this->forum_api->selectTopicPosts($user_id, $topic_id);
// Page refers to the page, of the topics-list.
$page = $command['page'];
@@ -312,7 +316,10 @@ public function onShowTopic($command)
// "Title: $title\n";
$text .= $this->language->lang('EBT_TOPIC_TITLE', $title, $viewtopic_url . $topic_id) . PHP_EOL;
$text .= $not_approved;
- $text .= $post['text'];
+ //Add an invisible marker, where the text could be split, if the
+ //full post is too long, to be displayed.
+ $text .= "\u{200B}\u{200B}";
+ $text .= $this->formatters->format_post_for_telegram($post['text']);
$readonly = !$post['reply'];
$first = false;
} else
@@ -320,7 +327,7 @@ public function onShowTopic($command)
// "$time: Reply from $user\n";
$text .= $this->language->lang('EBT_REPLY_AT_BY', $time, $user) . PHP_EOL;
$text .= $not_approved;
- $text .= $post['text'];
+ $text .= $this->formatters->format_post_for_telegram($post['text']);
}
$text .= PHP_EOL . '___________________________________' . PHP_EOL;
}
@@ -395,14 +402,14 @@ public function onSaveNewTopic($command)
return $this->onShowPermissions($command);
}
$title = $command['title'];
- $content = $this->format_text($command['text'], $command['entities'] ?? array());
+ $content = $this->formatters->format_input($command['text'], $command['entities'] ?? array());
$user = $command['user'];
$forum_id = $command['forum_id'];
- $saved = $this->forum_api->insertNewPost(true, $forum_id, $title, $content, $user);
+ $topic_id = $this->forum_api->insertNewPost(true, $forum_id, $title, $content, $user);
//Reset chat_state to topic-display (for back commands)
$this->forum_api->store_telegram_chat_state($command['chat_id'], 0, 'T');
- if ($saved)
+ if ($topic_id)
{
// The following post was saved.
$text = $this->language->lang('EBT_TOPIC_SAVED') . PHP_EOL;
@@ -411,7 +418,9 @@ public function onSaveNewTopic($command)
$text .= $this->language->lang('EBT_TOPIC_TITLE', $title, '') . PHP_EOL;
$text .= $content;
$buttons = array($this->language->lang('EBT_BACK') => 'allForumTopics');
- return [$text, $buttons];
+
+ $command['topic_id'] = $topic_id;
+ return $this->onShowTopic($command);
} else
{
return $this->errorOnSave();
@@ -426,16 +435,18 @@ public function onSaveNewPost(&$command)
{
return $this->onShowPermissions($command);
}
- $command['text'] = $this->format_text($command['text'], $command['entities'] ?? array());
+ $command['text'] = $this->formatters->format_input($command['text'], $command['entities'] ?? array());
$saved = $this->forum_api->insertNewPost(false, $command['forum_id'], $command['topic_id'], $command['text'], $command['user']);
//Reset chat_state to topic-display (for back commands)
$this->forum_api->store_telegram_chat_state($command['chat_id'], 0, 'T');
- if ($saved === true)
+ if ($saved)
{
return $this->onShowTopic($command);
} else
{
- $command['admin_info'] = $saved;
+ $topic_id = $command['topic_id'];
+ $username = $command['user']['username'];
+ $command['admin_info'] = "New post for topic $topic_id could not be saved, by user $username";
return $this->errorOnSave();
}
}
diff --git a/core/formatters.php b/core/formatters.php
new file mode 100644
index 0000000..b895169
--- /dev/null
+++ b/core/formatters.php
@@ -0,0 +1,275 @@
+config = $config;
+ $this->language = $language;
+ }
+ */
+
+ /** Format a phpbb-post, such that the formatting information
+ * is transformed into valid telegram formatting.
+ */
+ public function format_post_for_telegram($text) {
+ $ent = ENT_SUBSTITUTE | ENT_HTML401; //Don't substitute quotes
+ //The telegram bot only allows a predefined set of HTLM-Tags.
+ //The forum posts however surround each opening BBCode with an -Tag (and each closing with an )
+
+ //At first we remove all these tags and their BBCodes, we do not want to show
+ //up in telegram.
+ $bbcode_pattern = '~\[((i|b|u|url|email)(?:(?:=|\s).*)?)\](.*?)\[/\\2]~';
+ do
+ {
+ $text = preg_replace($bbcode_pattern, '$3', $text, 1, $count);
+ } while ($count > 0);
+
+ $allowed_tags = ['url', 'img', 'b', 'strong', 'i', 'em', 'u', 'ins', 'strike', 'del', 'a', 'code', 'pre'];
+
+ //Now remove all tags, which we do not allow. (Exception list contains all allowed tags.)
+ //This would not be necessary for "normal" text. But when the forum sends notifications,
+ //a lot of additional tags are surrounding the text, BB-Codes, links etc.
+ //$text = strip_tags($text, $allowed_tags); //Needs php version >= 7.4
+ $text = strip_tags($text, '<' . implode('><',$allowed_tags) . '>');
+
+ //Special handling of the url tag: Replace by
+ $url_pattern = '~(.*?)~is';
+ $text = preg_replace($url_pattern, '$2', $text);
+
+ //Special handling of the img tag:
+ //Replace the content of the anchor tag with a -Placeholder
+ $img_pattern = '~\[img](.*?)\[/img]~is';
+ $text = preg_replace($img_pattern, '<<IMAGE>>', $text);
+
+ //Add server address to relative links (no http-protokoll) starting with a slash (/)
+ $rel_url_pattern = '~(.*?)~is';
+ $replace = '$2';
+ $text = preg_replace($rel_url_pattern, $replace, $text);
+
+ //Add server address to relative links (no http-protokoll) NOT starting with a slash (/)
+ $rel_url_pattern = '~(.*?)~is';
+ $replace = '$2';
+ $text = preg_replace($rel_url_pattern, $replace, $text);
+
+ //Add a non printable space (ZWSP) to all forward slashes.
+ //By that, telegram does not treat the forward slash as the beginning of a command.
+ //Exclude double // and slashes belonging to html-tags
+ $text = preg_replace('~([^<]/)([^/])~', "$1\u{200B}$2", $text);
+
+ //Revert this, for all hrefs in anchors.
+ //By that, telegram does not treat the forward slash as the beginning of a command.
+ do
+ {
+ $text = preg_replace("~()~", '$1$2', $text, 1, $count);
+ } while ($count > 0);
+ return $text;
+ }
+
+ /** Implements the substring function (without length-param)
+ * such that html-tags are still correctly opened and closed.
+ * Cut the beginning of a text at the given offset.
+ * If the offset happens to lay inside a tagged area,
+ * the possibly cut off start tags are added again to the beginning of the
+ * text, such that the tags are still opened and closed correctly.
+ * If the offset would cut a start tag (before the ending >) or
+ * an end tag into pieces, the $offset is moved behind the closing >.
+ * By adding the start-tags before the offset, the text-length is increased.
+ * To ensure, the text is not longer, than it would be expected by the offset,
+ * the offset is increased, until the total lenght is less or equal
+ * than mb_strlen($text) - $offset.
+ */
+ public function tag_aware_substr($tagged_text, $offset) {
+ $tags = $this->parse_tags($tagged_text);
+ $prefix = '';
+ $cut_point = $offset - 1;
+ do
+ {
+ $cut_point++;
+ $prefix = $this->adapt_cut_point($tags, $cut_point);
+
+ } while ($cut_point - mb_strlen($prefix) < $offset);
+ return $prefix . mb_substr($tagged_text, $cut_point);
+ }
+
+ /** For a text, which is to be cut, adapt the cut-point, such
+ * that it does not cut a start or end tag of the text, and
+ * return the start-tags, which must be added as prefix, because the
+ * cut text still contains the corresponding end tags.
+ */
+ public function adapt_cut_point($tags, &$offset) {
+ $print = array();
+ //Move the offset, such that no tag is split
+ foreach ($tags as $tag) {
+ if ($offset <= $tag['full_s'] || $offset >= $tag['full_e'])
+ {
+ continue; //We are outside the enclosing tags.
+ }
+ if ($offset < $tag['full_s'] + mb_strlen($tag['s_tag']))
+ {
+ //Start tag would be cut. Move offset behind start-tag
+ $start_tag = $tag['s_tag'];
+ $offset = $tag['full_s'] + mb_strlen($start_tag);
+ break; //Tags are sequential. Cannot happen again.
+ }
+ if ($offset >= $tag['full_e'] - mb_strlen($tag['e_tag']))
+ {
+ //End tag would be cut, move $offset behind the end-tag.
+ //A pure endtag without previous content also does not make sense,
+ //therefore, this is also skipped. (>= in the condition above)
+ $offset = $tag['full_e'];
+ break; //Tags are sequential. Cannot happen again.
+ }
+ }
+ //Collect the start tags, which must be added to the beginning, such that
+ //their is no end-tag with missing start tag
+ $start_tags = array();
+ foreach ($tags as $tag) {
+ if ($offset > $tag['full_s'] && $offset < $tag['full_e'])
+ {
+ $start_tag = $tag['s_tag'];
+ $pos = $tag['full_s']; //keep start order
+ $start_tags[$pos] = $start_tag;
+ }
+ }
+ ksort($start_tags);
+ return implode('', $start_tags);
+ }
+
+ /** Find the start and end of text enclosed in html tags.
+ * The result is an array containing all tagged texts (alos if nested)
+ * in the following form:
+ * array(
+ * array('full' => full tag enclosed text,
+ * 'full_s' => offset, where full text starts
+ * 'full_e' => offset, where full text ends
+ * 's_tag' => complete start tag (including attributes)
+ * 'e_tag' => complete end tag
+ * ))
+ * In case of self closing tags, e_tag is empty.
+ */
+ public function parse_tags($tagged_text) {
+ $tag_pattern = "/<([\w]+)([^>]*?)(?:([\s]*\/>)|(?:(>)(?:(?:(?:[^<]*?|<\!\-\-.*?\-\->)|(?R))*)(<\/\\1[\s]*>)))/xsmu";
+ $result[] = array('full' => ' ' . $tagged_text, 'full_s' => -1);
+ for($i = 0; $i < count($result); $i++) {
+ $full = $result[$i]['full'];
+ $offset = $result[$i]['full_s'];
+ $t_count = preg_match_all($tag_pattern, mb_substr($full,1), $matches, PREG_OFFSET_CAPTURE);
+ if ($t_count) {
+ $j = 0;
+ foreach($matches[0] as $match) {
+ $mb_offset = mb_strlen(substr(mb_substr($full,1), 0, $match[1]));
+ $result[] = array(
+ 'full' => $match[0],
+ 'full_s' => $mb_offset + 1 + $offset,
+ 'full_e' => $mb_offset + 1 + mb_strlen($match[0]) + $offset,
+ 's_tag' => '<' . $matches[1][$j][0] . $matches[2][$j][0] . $matches[3][$j][0] . $matches[4][$j][0],
+ 'e_tag' => $matches[5][$j][0],
+ );
+ $j++;
+ }
+ }
+ }
+ //Remove the first full text entry
+ return array_slice($result,1);
+ }
+
+ /** Format the telegram input by adding bbCodes according to the formatting information,
+ * which telegram sends as so called entities.
+ */
+ public function format_input($text, $entities)
+ {
+ /* Split the text, at every point where a formatting starts or ends into an array.
+ * Therefore we collect at first all splitpoints, and remove duplicates.
+ */
+ $split_points[] = 0;
+ foreach ($entities as $entity)
+ {
+ $split_points[] = $entity->offset;
+ $split_points[] = $entity->offset + $entity->length;
+ }
+ $split_points = array_unique($split_points);
+ rsort($split_points);
+ $chunks = array();
+ foreach ($split_points as $point)
+ {
+ $chunks[$point] = mb_substr($text, $point);
+ $text = mb_substr($text, 0, $point);
+ }
+ ksort($chunks);
+ //Sort by end of formatting, such that in case of overlapping formats, the opening tag
+ //for the format, that gets closed last is placed at first.
+ usort($entities, function($a, $b)
+ {
+ return (($a->offset + $a->length) < ($b->offset + $b->length)) ? -1 : 1;
+ });
+ foreach ($entities as $entity)
+ {
+ $bbcode = $this->get_bbcode($entity->type);
+ if (!$bbcode)
+ {
+ continue;
+ }
+ $chunks[$entity->offset] = $bbcode . $chunks[$entity->offset];
+ }
+ for ($i = count($entities) - 1; $i >= 0; $i--)
+ {
+ $entity = $entities[$i];
+ $bbcode = $this->get_bbcode($entity->type, false);
+ if (!$bbcode)
+ {
+ continue;
+ }
+ $bbcode_start = $this->get_bbcode($entity->type);
+ $end = $entity->offset + $entity->length;
+ if (strpos($chunks[$end], $bbcode_start) === 0)
+ {
+ //Remove ending tag immediatly followed by starting tag
+ $chunks[$end] = substr($chunks[$end], strlen($bbcode_start));
+ } else
+ {
+ $chunks[$end] = $bbcode . $chunks[$end];
+ }
+ }
+ //Remove non printable whitespace, which may have been included, when user copies
+ //a part of a post, where the whitespace was added. (See telegrami_api->htmlentitiesForTelegram)
+ $text = implode('', $chunks);
+ $text = str_replace("/\u{200B}", "/", $text);
+ return $text;
+ }
+
+ private function get_bbcode($format_type, $start = true)
+ {
+ switch ($format_type)
+ {
+ case 'bold': return $start ? '[b]' : '[/b]';
+ case 'italic': return $start ? '[i]' : '[/i]';
+ case 'underline': return $start ? '[u]' : '[/u]';
+ case 'code': return $start ? '[code]' : '[/code]';
+ case 'pre': return $start ? '[code]' : '[/code]';
+ case 'strikethrough': return $start ? '' : '';
+ case 'url': return ''; //No need for BBCode
+ default: return false;
+ }
+ }
+
+}
diff --git a/core/forum_api.php b/core/forum_api.php
index fdd1af8..c981dc5 100644
--- a/core/forum_api.php
+++ b/core/forum_api.php
@@ -520,7 +520,8 @@ public function insertNewPost($new_topic, $forum_id, $topic_id_or_title, $text,
{
$user->data[$prop] = $userOrigData[$prop];
}
- return $url ? true : false;
+ //For new topics, topic_id was set in submit_post.
+ return $url ? $data['topic_id'] : false;
}
private function print_formatted($obj)
diff --git a/core/telegram_api.php b/core/telegram_api.php
index df80330..5c66eb6 100644
--- a/core/telegram_api.php
+++ b/core/telegram_api.php
@@ -30,6 +30,7 @@ public function __construct(\phpbb\config\config $config,
{
$this->config = $config;
$this->language = $language;
+ $this->formatters = new \eb\telegram\core\formatters();
}
/** Get the name of the bot */
@@ -167,23 +168,27 @@ public function prepareMessage($text, $buttons = false)
return $message;
}
- private function prepareText($org_text)
+ /** Shorten the text if necessary. Keep the html-tags intact. */
+ private function prepareText($text)
{
- $text = $this->htmlentitiesForTelegram($org_text);
- // Return the text from its XML form to its original plain text form
- if (strlen($text) >= 4096)
+ $maxlen = 4096;
+ if (mb_strlen($text) > $maxlen)
{
- // Warning: Topic is too long and was cut. Telegram doesn \'t allow more than 4096 characters !',
- $pretext = $this->language->lang('EBT_TOPIC_SHORTENED') . PHP_EOL . '...' . PHP_EOL;
- $len = 4095 - strlen($pretext);
- while (strlen($text) >= 4069)
+ //Split the text a two consecutive ZWSPs, if found
+ $splitmarker = "\u{200B}\u{200B}";
+ $pos = strpos($text, $splitmarker);
+ $title = '';
+ if ($pos !== false)
{
- $len--;
- $text = mb_substr($org_text, -$len);
- //To avoid open tags, we need to encode html-chars again, after the text was shortend
- $text = $this->htmlentitiesForTelegram($text);
- $text = $pretext . $text;
+ $title = substr($text, 0, $pos);
+ $text = substr($text,$pos + strlen($splitmarker));
}
+ // Warning: Topic is too long and was cut. Telegram doesn \'t allow more than 4096 characters !',
+ $pretext = $this->language->lang('EBT_TOPIC_SHORTENED') . PHP_EOL . '...' . PHP_EOL;
+ $remaining_len = $maxlen - mb_strlen($pretext) - mb_strlen($title);
+ $offset = mb_strlen($text) - $remaining_len;
+ $text = $this->formatters->tag_aware_substr($text, $offset);
+ $text = $title . $pretext . $text;
}
return $text;
}
@@ -195,12 +200,12 @@ private function prepareText($org_text)
private function prepare_button_text($text)
{
$text = strip_tags($text);
+ //Button-texts do not need html-encoding
+ $text = html_entity_decode($text);
if (mb_strlen($text) > 24)
{
$text = mb_substr($text, 0, 20) . ' ...'; //Multibyte-safe cut
}
- //Button-texts do not need html-encoding
- $text = html_entity_decode($text);
return $text;
}
@@ -217,7 +222,7 @@ private function htmlentitiesForTelegram ($text)
$allowed_tags_bar_separated = implode('|', $allowed_tags);
//Match for opening tags with optional attributes, followed by any text, followed by the same closing tag.
//Use https://regexper.com/ to visualize the pattern
- //("\\" must be replaced by "\" and "/" by "\/" for this tool )
+ //("\\" (backreference) must be replaced by "\" and "/" by "\/" for this tool )
//https://regexper.com/#%26lt%3B%28%28list%7Cof%7Callowed%7Ctags%29%28%3F%3A%28%3F%3A%5Cs%2B%5Cw%2B%3F%28%3F%3A%5Cs*%3D%5Cs*%28%3F%3A%5C%22%5B%5E%5C%22%5D*%5C%22%7C'%5B%5E'%5D*'%29%29%29%2B%5Cs*%7C%5Cs*%29%29%26gt%3B%28.*%3F%29%26lt%3B%5C%2F%5C2%5Cs*%26gt%3B
$pattern = "~<(($allowed_tags_bar_separated)(?:(?:\s+\w+?(?:\s*=\s*(?:\"[^\"]*\"|'[^']*')))+\s*|\s*))>(.*?)</\\2\s*>~is";
//Groups: 1: Full tag-content including attributs, 2: tag-name, 3: content between tags.
@@ -248,6 +253,7 @@ private function htmlentitiesForTelegram ($text)
//Add a non printable space (ZWSP) to all forward slashes, which do not belong to an HTML-Tag.
//By that, telegram does not treat the forward slash as the beginning of a command.
//$text = preg_replace('~([^<]/)~', "$0\u{200B}", $text);
+
return $text;
}
diff --git a/tests/core/format_test.php b/tests/core/format_test.php
index 771cfa7..52c4554 100644
--- a/tests/core/format_test.php
+++ b/tests/core/format_test.php
@@ -17,29 +17,41 @@
/** Test the formatting of telegram input*/
class format_test extends \phpbb_test_case
{
- /** @var \eb\telegram\core\commands */
- private $commands;
+ /** @var \eb\telegram\core\formatters */
+ private $formatters;
public function setUp(): void
{
parent::setUp();
- $this->commands = $this->getMockBuilder('\eb\telegram\core\commands')
- ->disableOriginalConstructor()
- ->setMethodsExcept(['format_text'])
- ->getMock();
+ $this->config = $this->getMockBuilder('\phpbb\config\config')
+ ->disableOriginalConstructor()
+ ->getMock();
+ //Config entries expected by generate_board_url()
+ $this->config->expects($this->any())
+ ->method('offsetGet')
+ ->willReturnMap([ //Map param(s) to return value
+ ['force_server_vars', true],
+ ['server_protocol', 'http://'],
+ ['server_name', 'server.name'],
+ ['server_port', ''],
+ ['script_path', '/phpbb'],
+ ['cookie_secure', ''],
+ ]);
+
+ $this->formatters = new \eb\telegram\core\formatters();
}
//no formatting at all
- public function test_plain()
+ public function test_format_input_plain()
{
$input = "Some text";
- $formatted = $this->commands->format_text($input, array());
+ $formatted = $this->formatters->format_input($input, array());
//No change expected;
$this->assertEquals($formatted, $input);
}
//multiple formats one after the other
- public function test_sequential()
+ public function test_format_input_sequential()
{
$input = 'A bold italic underlined text';
$entities = json_decode( '[{' .
@@ -56,12 +68,12 @@ public function test_sequential()
'"type": "underline"' .
'}]');
$expected = 'A [b]bold[/b] [i]italic[/i] [u]underlined[/u] text';
- $formatted = $this->commands->format_text($input, $entities);
+ $formatted = $this->formatters->format_input($input, $entities);
$this->assertEquals($expected, $formatted);
}
//Multiple formats nested
- public function test_nested()
+ public function test_format_input_nested()
{
$input = 'A_bold_italic_underlined_italic_bold_text';
$entities = json_decode('[{' .
@@ -90,14 +102,14 @@ public function test_nested()
'"type": "underline"' .
'}]');
$expected = 'A_[b]bold_[i]italic_[u]underlined_[/u]italic[/i]_bold[/b]_text';
- $formatted = $this->commands->format_text($input, $entities);
+ $formatted = $this->formatters->format_input($input, $entities);
$this->assertEquals($expected, $formatted);
}
/** Multiple formats overlapping: bold starts, italic starts, bold ends, italic ends.
* (Will usually be split already by telegram)
*/
- public function test_overlapping()
+ public function test_format_input_overlapping()
{
$input = 'A bold italic bold_end italic_end text';
$entities = json_decode('[{' .
@@ -110,7 +122,7 @@ public function test_overlapping()
'"type": "italic"' .
'}]');
$expected = 'A [b]bold [i]italic [/b]bold_end [/i]italic_end text';
- $formatted = $this->commands->format_text($input, $entities);
+ $formatted = $this->formatters->format_input($input, $entities);
$this->assertEquals($expected, $formatted);
//This is how telegram would send it:
@@ -128,13 +140,13 @@ public function test_overlapping()
'"type": "bold"' .
'}]');
$expected = 'A [b]bold [/b][i][b]italic[/b] bold_end [/i]italic_end text';
- $formatted = $this->commands->format_text($input, $entities);
+ $formatted = $this->formatters->format_input($input, $entities);
$this->assertEquals($expected, $formatted);
}
/** Test with unicode-characters.
*/
- public function test_umlauts()
+ public function test_format_input_umlauts()
{
$input = 'Check formäätting with ÄÖÜ.';
$input = "Check form\u{00e4}\u{00e4}tting with \u{00c4}\u{00d6}\u{00dc}.";
@@ -148,8 +160,141 @@ public function test_umlauts()
'"type": "italic"' .
'}]');
$expected = 'Check [b]formäätting[/b] with [i]ÄÖÜ[/i].';
- $formatted = $this->commands->format_text($input, $entities);
+ $formatted = $this->formatters->format_input($input, $entities);
$this->assertEquals($expected, $formatted);
}
+ /** Test the formatting of a post with lots of
+ * different BBCodes.
+ */
+ public function test_format_post_for_telelegram()
+ {
+ global $config;
+ $config = $this->config;
+ //This is the typical DB-content for a post.
+ $input = <<<'EOD'
+mention -> [quote]Quote something[/quote]
+bot_command -> /should not be treated as command
+url1 -> [url=http://google.com]BBCode-Url with text[/url]
+url2 -> [url]http://google.com[/url] (BBCode-Url without text)
+url3 without bbcode: http://google.com
+email -> [email]email@for.you[/email] (Email in BBCode)
+email without BBCode: email@for.you
+[b]bold text[/b]
+[i]italic text[/i]
+[u]underlined text[/u]
+HTML-tags: <strike>Has no effect</strike>
+[code]A piece of code[/code]
+[img]https://upload.wikimedia.org/wikipedia/commons/4/4a/Dot-yellow.gif[/img]
+Image with relative link1: [img]./styles/moschistyle32/theme/images/Moschifreunde.jpg[/img]
+Image with relative link2: [img]./styles/moschistyle32/theme/images/Moschifreunde.jpg[/img]
+[attachment]an attachment[/attachment]
+[color=red]Red color[/color]
+[size=110]A bit bigger[/size]
+[list]Start of List
+[*]first list item
+[/list]
+EOD;
+ $expected = <<BBCode-Url with text
+url2 -> http://\u{200B}google.com (BBCode-Url without text)
+url3 without bbcode: http://\u{200B}google.com
+email -> email@for.you (Email in BBCode)
+email without BBCode: email@for.you
+bold text
+italic text
+underlined text
+HTML-tags: <strike>Has no effect</\u{200B}strike>
+[code]A piece of code[/\u{200B}code]
+<<IMAGE>>
+Image with relative link1: <<IMAGE>>
+Image with relative link2: <<IMAGE>>
+[attachment]an attachment[/\u{200B}attachment]
+[color=red]Red color[/\u{200B}color]
+[size=110]A bit bigger[/\u{200B}size]
+[list]Start of List
+[*]first list item
+[/\u{200B}list]
+EOD;
+ $formatted = $this->formatters->format_post_for_telegram($input);
+ $this->assertEquals($expected, $formatted);
+ }
+
+ public function test_parse_nested_tags()
+ {
+ $input = <<<'EOD'
+url1 -> BBCode-Url with text
+bold italic (self closing br
)underlined
+with umlauts: ÄÜÖäöüßnested italic text text text
+[code]A piece of code[/\u{200B}code]
+EOD;
+ $tag_info = $this->formatters->parse_tags($input);
+ $expected = [
+ 'BBCode-Url with text',
+ "bold italic (self closing br
)underlined \nwith umlauts: ÄÜÖäöüßnested italic text text text",
+ '[code]A piece of code[/\u{200B}code]
',
+ "italic (self closing br
)underlined \nwith umlauts: ÄÜÖäöüßnested italic text text",
+ '
',
+ "underlined \nwith umlauts: ÄÜÖäöüßnested italic text",
+ 'nested italic',
+ ];
+ $full_texts = array_column($tag_info, 'full');
+ //$this->assertEquals('', print_r($tag_info, true)); //for output of $tag_info
+ $this->assertEquals($expected, $full_texts);
+ }
+
+ public function tag_aware_substr_data_provider() {
+ return array (
+ [ 0, 'url1 -> B'], //whole text
+ [ 11, 'BBCode-Url w'], //Before a-tag
+ [ 12, 'BCode-Url wi'], //a-tag would be cut
+ [ 30, 'tbold'], //1 before end of a-tag
+ [ 31, 'bold italic (self closing br
'], //would lead to empty a-tag
+ [ 33, 'bold italic (self closing br
'], //inside ending a-tag
+ [ 35, 'bold italic (self closing br
'], //just before B-tag
+ [129, 'ßnested italic text '], //cut the starting i
+ [130, 'nested italic text t'], //exactly after starting i
+ [131, 'ested italic text te'], //exactly after starting i
+ );
+ }
+
+ /** @dataProvider tag_aware_substr_data_provider */
+ public function test_tag_aware_substr($offset, $expected_text40)
+ {
+ $input = <<BBCode-Url with text
+bold italic (self closing br
)underlined with umlauts: ÄÜÖäöüßnested italic text text text
+[code]A piece of code[/\u{200B}code]
+EOD;
+ $input = str_replace("\n", '', $input);
+ $text = $this->formatters->tag_aware_substr($input, $offset);
+ $this->assertEquals($expected_text40, mb_substr($text,0,40));
+ }
+
+ public function test_tag_aware_substr_len()
+ {
+ $input = <<BBCode-Url with text
+bold italic (self closing br
)underlined with umlauts: ÄÜÖäöüßnested italic text text text
+[code]A piece of code[/\u{200B}code]
+EOD;
+ $input = str_replace("\n", '', $input);
+ for ($i = 0; $i < mb_strlen($input); $i++)
+ {
+ $text = $this->formatters->tag_aware_substr($input, $i);
+ $exp_len = mb_strlen($input) - $i;
+ $len = mb_strlen($text);
+ $this->assertLessThanOrEqual($exp_len, $len);
+ //Putting the a-tag in front of the text, would lead to
+ //an empty end-a-tag. Thus the offset is moved even
+ //behind the end-a-tag, which sums up to 31 chars.
+ $this->assertGreaterThanOrEqual($exp_len, $len+32);
+ $print[] = "$i: $exp_len -> $len";
+ }
+ //For output of length-info:
+ //$this->assertEquals('', implode("\n", $print));
+ }
+
}