Skip to content

Commit

Permalink
Formatting input and output completely refactored.
Browse files Browse the repository at this point in the history
  • Loading branch information
D-MBLD committed Mar 25, 2023
1 parent d89e7e4 commit a3e8edb
Show file tree
Hide file tree
Showing 5 changed files with 482 additions and 44 deletions.
31 changes: 21 additions & 10 deletions core/commands.php
Expand Up @@ -21,6 +21,9 @@ class commands
/** @var \eb\telegram\core\forum_api */
private $forum_api;

/** @var \eb\telegram\core\formatters */
private $formatters;

/**
* Constructor
*
Expand All @@ -36,6 +39,7 @@ public function __construct(\phpbb\config\config $config,
$this->config = $config;
$this->language = $language;
$this->forum_api = $forum_api;
$this->formatters = new \eb\telegram\core\formatters(/*$config, $language*/);
}

public function onButtonOutdated($command)
Expand Down Expand Up @@ -283,7 +287,7 @@ public function onShowTopic($command)
$topics = $this->forum_api->selectForumTopics($user_id, $command['forum_id']);
$topic_id = $topics[$topic_index]['topic_id'] ?? 0;
}
//Permission check needed
//Permission check needed
$posts = $this->forum_api->selectTopicPosts($user_id, $topic_id);
// Page refers to the page, of the topics-list.
$page = $command['page'];
Expand Down Expand Up @@ -312,15 +316,18 @@ public function onShowTopic($command)
// "Title: <b>$title</b>\n";
$text .= $this->language->lang('EBT_TOPIC_TITLE', $title, $viewtopic_url . $topic_id) . PHP_EOL;
$text .= $not_approved;
$text .= $post['text'];
//Add an invisible marker, where the text could be split, if the
//full post is too long, to be displayed.
$text .= "\u{200B}\u{200B}";
$text .= $this->formatters->format_post_for_telegram($post['text']);
$readonly = !$post['reply'];
$first = false;
} else
{
// "<b>$time:</b> Reply from <b>$user</b>\n";
$text .= $this->language->lang('EBT_REPLY_AT_BY', $time, $user) . PHP_EOL;
$text .= $not_approved;
$text .= $post['text'];
$text .= $this->formatters->format_post_for_telegram($post['text']);
}
$text .= PHP_EOL . '<u>___________________________________</u>' . PHP_EOL;
}
Expand Down Expand Up @@ -395,14 +402,14 @@ public function onSaveNewTopic($command)
return $this->onShowPermissions($command);
}
$title = $command['title'];
$content = $this->format_text($command['text'], $command['entities'] ?? array());
$content = $this->formatters->format_input($command['text'], $command['entities'] ?? array());
$user = $command['user'];
$forum_id = $command['forum_id'];
$saved = $this->forum_api->insertNewPost(true, $forum_id, $title, $content, $user);
$topic_id = $this->forum_api->insertNewPost(true, $forum_id, $title, $content, $user);
//Reset chat_state to topic-display (for back commands)
$this->forum_api->store_telegram_chat_state($command['chat_id'], 0, 'T');

if ($saved)
if ($topic_id)
{
// The following post was saved.
$text = $this->language->lang('EBT_TOPIC_SAVED') . PHP_EOL;
Expand All @@ -411,7 +418,9 @@ public function onSaveNewTopic($command)
$text .= $this->language->lang('EBT_TOPIC_TITLE', $title, '') . PHP_EOL;
$text .= $content;
$buttons = array($this->language->lang('EBT_BACK') => 'allForumTopics');
return [$text, $buttons];

$command['topic_id'] = $topic_id;
return $this->onShowTopic($command);
} else
{
return $this->errorOnSave();
Expand All @@ -426,16 +435,18 @@ public function onSaveNewPost(&$command)
{
return $this->onShowPermissions($command);
}
$command['text'] = $this->format_text($command['text'], $command['entities'] ?? array());
$command['text'] = $this->formatters->format_input($command['text'], $command['entities'] ?? array());
$saved = $this->forum_api->insertNewPost(false, $command['forum_id'], $command['topic_id'], $command['text'], $command['user']);
//Reset chat_state to topic-display (for back commands)
$this->forum_api->store_telegram_chat_state($command['chat_id'], 0, 'T');
if ($saved === true)
if ($saved)
{
return $this->onShowTopic($command);
} else
{
$command['admin_info'] = $saved;
$topic_id = $command['topic_id'];
$username = $command['user']['username'];
$command['admin_info'] = "New post for topic $topic_id could not be saved, by user $username";
return $this->errorOnSave();
}
}
Expand Down
275 changes: 275 additions & 0 deletions core/formatters.php
@@ -0,0 +1,275 @@
<?php
/**
*
* Telegram Bridge. An extension for the phpBB Forum Software package.
*
* @copyright (c) 2022, Edgar Bolender
* @license GNU General Public License, version 2 (GPL-2.0)
*
*/

namespace eb\telegram\core;

/** Create the text and buttons response depending on the state and the users
* input.
* If an action like saving a post or saving the state is involved, this is
* also done here.*/
class formatters
{

/*
public function __construct(\phpbb\config\config $config,
\phpbb\language\language $language,
\eb\telegram\core\forum_api $forum_api
)
{
$this->config = $config;
$this->language = $language;
}
*/

/** Format a phpbb-post, such that the formatting information
* is transformed into valid telegram formatting.
*/
public function format_post_for_telegram($text) {
$ent = ENT_SUBSTITUTE | ENT_HTML401; //Don't substitute quotes
//The telegram bot only allows a predefined set of HTLM-Tags.
//The forum posts however surround each opening BBCode with an <s>-Tag (and each closing with an <e>)

//At first we remove all these tags and their BBCodes, we do not want to show
//up in telegram.
$bbcode_pattern = '~<s>\[((i|b|u|url|email)(?:(?:=|\s).*)?)\]</s>(.*?)<e>\[/\\2]</e>~';
do
{
$text = preg_replace($bbcode_pattern, '$3', $text, 1, $count);
} while ($count > 0);

$allowed_tags = ['url', 'img', 'b', 'strong', 'i', 'em', 'u', 'ins', 'strike', 'del', 'a', 'code', 'pre'];

//Now remove all tags, which we do not allow. (Exception list contains all allowed tags.)
//This would not be necessary for "normal" text. But when the forum sends notifications,
//a lot of additional tags are surrounding the text, BB-Codes, links etc.
//$text = strip_tags($text, $allowed_tags); //Needs php version >= 7.4
$text = strip_tags($text, '<' . implode('><',$allowed_tags) . '>');

//Special handling of the url tag: Replace <URL url=...> by <a href=...>
$url_pattern = '~<URL url="([^"]*)">(.*?)</URL>~is';
$text = preg_replace($url_pattern, '<a href="$1">$2</a>', $text);

//Special handling of the img tag:
//Replace the content of the anchor tag with a <IMAGE>-Placeholder
$img_pattern = '~<IMG src="([^"]*)">\[img](.*?)\[/img]</IMG>~is';
$text = preg_replace($img_pattern, '<a href="$1">&lt;&lt;IMAGE&gt;&gt;</a>', $text);

//Add server address to relative links (no http-protokoll) starting with a slash (/)
$rel_url_pattern = '~<a href="((?!https?://)/[^"]*)">(.*?)</a>~is';
$replace = '<a href="' . generate_board_url(true) . '$1">$2</a>';
$text = preg_replace($rel_url_pattern, $replace, $text);

//Add server address to relative links (no http-protokoll) NOT starting with a slash (/)
$rel_url_pattern = '~<a href="((?!https?://)[^"]*)">(.*?)</a>~is';
$replace = '<a href="' . generate_board_url(false) . '/$1">$2</a>';
$text = preg_replace($rel_url_pattern, $replace, $text);

//Add a non printable space (ZWSP) to all forward slashes.
//By that, telegram does not treat the forward slash as the beginning of a command.
//Exclude double // and slashes belonging to html-tags
$text = preg_replace('~([^<]/)([^/])~', "$1\u{200B}$2", $text);

//Revert this, for all hrefs in anchors.
//By that, telegram does not treat the forward slash as the beginning of a command.
do
{
$text = preg_replace("~(<a href=\"[^\"]*/)\u{200B}([^\"]*\">)~", '$1$2', $text, 1, $count);
} while ($count > 0);
return $text;
}

/** Implements the substring function (without length-param)
* such that html-tags are still correctly opened and closed.
* Cut the beginning of a text at the given offset.
* If the offset happens to lay inside a tagged area,
* the possibly cut off start tags are added again to the beginning of the
* text, such that the tags are still opened and closed correctly.
* If the offset would cut a start tag (before the ending >) or
* an end tag into pieces, the $offset is moved behind the closing >.
* By adding the start-tags before the offset, the text-length is increased.
* To ensure, the text is not longer, than it would be expected by the offset,
* the offset is increased, until the total lenght is less or equal
* than mb_strlen($text) - $offset.
*/
public function tag_aware_substr($tagged_text, $offset) {
$tags = $this->parse_tags($tagged_text);
$prefix = '';
$cut_point = $offset - 1;
do
{
$cut_point++;
$prefix = $this->adapt_cut_point($tags, $cut_point);

} while ($cut_point - mb_strlen($prefix) < $offset);
return $prefix . mb_substr($tagged_text, $cut_point);
}

/** For a text, which is to be cut, adapt the cut-point, such
* that it does not cut a start or end tag of the text, and
* return the start-tags, which must be added as prefix, because the
* cut text still contains the corresponding end tags.
*/
public function adapt_cut_point($tags, &$offset) {
$print = array();
//Move the offset, such that no tag is split
foreach ($tags as $tag) {
if ($offset <= $tag['full_s'] || $offset >= $tag['full_e'])
{
continue; //We are outside the enclosing tags.
}
if ($offset < $tag['full_s'] + mb_strlen($tag['s_tag']))
{
//Start tag would be cut. Move offset behind start-tag
$start_tag = $tag['s_tag'];
$offset = $tag['full_s'] + mb_strlen($start_tag);
break; //Tags are sequential. Cannot happen again.
}
if ($offset >= $tag['full_e'] - mb_strlen($tag['e_tag']))
{
//End tag would be cut, move $offset behind the end-tag.
//A pure endtag without previous content also does not make sense,
//therefore, this is also skipped. (>= in the condition above)
$offset = $tag['full_e'];
break; //Tags are sequential. Cannot happen again.
}
}
//Collect the start tags, which must be added to the beginning, such that
//their is no end-tag with missing start tag
$start_tags = array();
foreach ($tags as $tag) {
if ($offset > $tag['full_s'] && $offset < $tag['full_e'])
{
$start_tag = $tag['s_tag'];
$pos = $tag['full_s']; //keep start order
$start_tags[$pos] = $start_tag;
}
}
ksort($start_tags);
return implode('', $start_tags);
}

/** Find the start and end of text enclosed in html tags.
* The result is an array containing all tagged texts (alos if nested)
* in the following form:
* array(
* array('full' => full tag enclosed text,
* 'full_s' => offset, where full text starts
* 'full_e' => offset, where full text ends
* 's_tag' => complete start tag (including attributes)
* 'e_tag' => complete end tag
* ))
* In case of self closing tags, e_tag is empty.
*/
public function parse_tags($tagged_text) {
$tag_pattern = "/<([\w]+)([^>]*?)(?:([\s]*\/>)|(?:(>)(?:(?:(?:[^<]*?|<\!\-\-.*?\-\->)|(?R))*)(<\/\\1[\s]*>)))/xsmu";
$result[] = array('full' => ' ' . $tagged_text, 'full_s' => -1);
for($i = 0; $i < count($result); $i++) {
$full = $result[$i]['full'];
$offset = $result[$i]['full_s'];
$t_count = preg_match_all($tag_pattern, mb_substr($full,1), $matches, PREG_OFFSET_CAPTURE);
if ($t_count) {
$j = 0;
foreach($matches[0] as $match) {
$mb_offset = mb_strlen(substr(mb_substr($full,1), 0, $match[1]));
$result[] = array(
'full' => $match[0],
'full_s' => $mb_offset + 1 + $offset,
'full_e' => $mb_offset + 1 + mb_strlen($match[0]) + $offset,
's_tag' => '<' . $matches[1][$j][0] . $matches[2][$j][0] . $matches[3][$j][0] . $matches[4][$j][0],
'e_tag' => $matches[5][$j][0],
);
$j++;
}
}
}
//Remove the first full text entry
return array_slice($result,1);
}

/** Format the telegram input by adding bbCodes according to the formatting information,
* which telegram sends as so called entities.
*/
public function format_input($text, $entities)
{
/* Split the text, at every point where a formatting starts or ends into an array.
* Therefore we collect at first all splitpoints, and remove duplicates.
*/
$split_points[] = 0;
foreach ($entities as $entity)
{
$split_points[] = $entity->offset;
$split_points[] = $entity->offset + $entity->length;
}
$split_points = array_unique($split_points);
rsort($split_points);
$chunks = array();
foreach ($split_points as $point)
{
$chunks[$point] = mb_substr($text, $point);
$text = mb_substr($text, 0, $point);
}
ksort($chunks);
//Sort by end of formatting, such that in case of overlapping formats, the opening tag
//for the format, that gets closed last is placed at first.
usort($entities, function($a, $b)
{
return (($a->offset + $a->length) < ($b->offset + $b->length)) ? -1 : 1;
});
foreach ($entities as $entity)
{
$bbcode = $this->get_bbcode($entity->type);
if (!$bbcode)
{
continue;
}
$chunks[$entity->offset] = $bbcode . $chunks[$entity->offset];
}
for ($i = count($entities) - 1; $i >= 0; $i--)
{
$entity = $entities[$i];
$bbcode = $this->get_bbcode($entity->type, false);
if (!$bbcode)
{
continue;
}
$bbcode_start = $this->get_bbcode($entity->type);
$end = $entity->offset + $entity->length;
if (strpos($chunks[$end], $bbcode_start) === 0)
{
//Remove ending tag immediatly followed by starting tag
$chunks[$end] = substr($chunks[$end], strlen($bbcode_start));
} else
{
$chunks[$end] = $bbcode . $chunks[$end];
}
}
//Remove non printable whitespace, which may have been included, when user copies
//a part of a post, where the whitespace was added. (See telegrami_api->htmlentitiesForTelegram)
$text = implode('', $chunks);
$text = str_replace("/\u{200B}", "/", $text);
return $text;
}

private function get_bbcode($format_type, $start = true)
{
switch ($format_type)
{
case 'bold': return $start ? '[b]' : '[/b]';
case 'italic': return $start ? '[i]' : '[/i]';
case 'underline': return $start ? '[u]' : '[/u]';
case 'code': return $start ? '[code]' : '[/code]';
case 'pre': return $start ? '[code]' : '[/code]';
case 'strikethrough': return $start ? '<del>' : '</del>';
case 'url': return ''; //No need for BBCode
default: return false;
}
}

}
3 changes: 2 additions & 1 deletion core/forum_api.php
Expand Up @@ -520,7 +520,8 @@ public function insertNewPost($new_topic, $forum_id, $topic_id_or_title, $text,
{
$user->data[$prop] = $userOrigData[$prop];
}
return $url ? true : false;
//For new topics, topic_id was set in submit_post.
return $url ? $data['topic_id'] : false;
}

private function print_formatted($obj)
Expand Down

0 comments on commit a3e8edb

Please sign in to comment.