From 05dddc42ff9bc3904a6a3121dce592cab7185c3a Mon Sep 17 00:00:00 2001 From: Ben Wiederhake Date: Thu, 7 Mar 2024 22:34:15 +0100 Subject: [PATCH 1/3] Implement and test byte-splitting helper function --- bridge/helper/helper.go | 27 ++++++++++ bridge/helper/helper_test.go | 102 +++++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+) diff --git a/bridge/helper/helper.go b/bridge/helper/helper.go index 0208dff1bd..db3b54a85c 100644 --- a/bridge/helper/helper.go +++ b/bridge/helper/helper.go @@ -219,6 +219,33 @@ func ClipMessage(text string, length int, clippingMessage string) string { return text } +func ClipOrSplitMessage(text string, length int, clippingMessage string, splitMax int) []string { + var msgParts []string + var remainingText = text + // Invariant of this splitting loop: No text is lost (msgParts+remainingText is the original text), + // and all parts is guaranteed to satisfy the length requirement. + for len(msgParts) < splitMax - 1 && len(remainingText) > length { + // Decision: The text needs to be split (again). + var chunk string + var wasted = 0 + // The longest UTF-8 encoding of a valid rune is 4 bytes (0xF4 0x8F 0xBF 0xBF, encoding U+10FFFF), + // so we should never need to waste 4 or more bytes at a time. + for wasted < 4 && wasted < length { + chunk = remainingText[:length - wasted] + if r, _ := utf8.DecodeLastRuneInString(chunk); r == utf8.RuneError { + wasted += 1 + } else { + break + } + } + // Note: At this point, "chunk" might still be invalid, if "text" is very broken. + msgParts = append(msgParts, chunk) + remainingText = remainingText[len(chunk):] + } + msgParts = append(msgParts, ClipMessage(remainingText, length, clippingMessage)) + return msgParts +} + // ParseMarkdown takes in an input string as markdown and parses it to html func ParseMarkdown(input string) string { extensions := parser.HardLineBreak | parser.NoIntraEmphasis | parser.FencedCode diff --git a/bridge/helper/helper_test.go b/bridge/helper/helper_test.go index 76e548e487..d486d64766 100644 --- a/bridge/helper/helper_test.go +++ b/bridge/helper/helper_test.go @@ -125,3 +125,105 @@ func TestConvertWebPToPNG(t *testing.T) { t.Fail() } } + +var clippingOrSplittingTestCases = map[string]struct { + inputText string + clipSplitLength int + clippingMessage string + splitMax int + expectedOutput []string +}{ + "Short single-line message, split 3": { + inputText: "short", + clipSplitLength: 20, + clippingMessage: "?!?!", + splitMax: 3, + expectedOutput: []string{"short"}, + }, + "Short single-line message, split 1": { + inputText: "short", + clipSplitLength: 20, + clippingMessage: "?!?!", + splitMax: 1, + expectedOutput: []string{"short"}, + }, + "Short single-line message, split 0": { + // Mainly check that we don't crash. + inputText: "short", + clipSplitLength: 20, + clippingMessage: "?!?!", + splitMax: 0, + expectedOutput: []string{"short"}, + }, + "Long single-line message, noclip": { + inputText: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.", + clipSplitLength: 50, + clippingMessage: "?!?!", + splitMax: 10, + expectedOutput: []string{ + "Lorem ipsum dolor sit amet, consectetur adipiscing", + " elit, sed do eiusmod tempor incididunt ut labore ", + "et dolore magna aliqua.", + }, + }, + "Long single-line message, noclip tight": { + inputText: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.", + clipSplitLength: 50, + clippingMessage: "?!?!", + splitMax: 3, + expectedOutput: []string{ + "Lorem ipsum dolor sit amet, consectetur adipiscing", + " elit, sed do eiusmod tempor incididunt ut labore ", + "et dolore magna aliqua.", + }, + }, + "Long single-line message, clip custom": { + inputText: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.", + clipSplitLength: 50, + clippingMessage: "?!?!", + splitMax: 2, + expectedOutput: []string{ + "Lorem ipsum dolor sit amet, consectetur adipiscing", + " elit, sed do eiusmod tempor incididunt ut lab?!?!", + }, + }, + "Long single-line message, clip built-in": { + inputText: "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.", + clipSplitLength: 50, + clippingMessage: "", + splitMax: 2, + expectedOutput: []string{ + "Lorem ipsum dolor sit amet, consectetur adipiscing", + " elit, sed do eiusmod tempor inc ", + }, + }, + "Short multi-line message": { + inputText: "I\ncan't\nget\nno\nsatisfaction!", + clipSplitLength: 50, + clippingMessage: "", + splitMax: 2, + expectedOutput: []string{"I\ncan't\nget\nno\nsatisfaction!"}, + }, + "Long message containing UTF-8 multi-byte runes": { + inputText: "人人生而自由,在尊嚴和權利上一律平等。 他們都具有理性和良知,應該以兄弟情誼的精神對待彼此。", + clipSplitLength: 50, + clippingMessage: "", + splitMax: 10, + expectedOutput: []string{ + "人人生而自由,在尊嚴和權利上一律", // Note: only 48 bytes! + "平等。 他們都具有理性和良知,應該", // Note: only 49 bytes! + "以兄弟情誼的精神對待彼此。", + }, + }, +} + +func TestClipOrSplitMessage(t *testing.T) { + for testname, testcase := range clippingOrSplittingTestCases { + actualOutput := ClipOrSplitMessage(testcase.inputText, testcase.clipSplitLength, testcase.clippingMessage, testcase.splitMax) + assert.Equalf(t, testcase.expectedOutput, actualOutput, "'%s' testcase should give expected lines with clipping+splitting.", testname) + for _, splitLine := range testcase.expectedOutput { + byteLength := len([]byte(splitLine)) + assert.True(t, byteLength <= testcase.clipSplitLength, "Splitted line '%s' of testcase '%s' should not exceed the maximum byte-length (%d vs. %d).", splitLine, testname, testcase.clipSplitLength, byteLength) + } + } +} From 8a646c5ad5761d50bbb123b2caa07d853b67cf44 Mon Sep 17 00:00:00 2001 From: Ben Wiederhake Date: Thu, 7 Mar 2024 23:22:29 +0100 Subject: [PATCH 2/3] Implement discord botuser message splitting --- bridge/config/config.go | 1 + bridge/discord/discord.go | 59 ++++++++++++++++++++++++++------------- matterbridge.toml.sample | 11 ++++++-- 3 files changed, 49 insertions(+), 22 deletions(-) diff --git a/bridge/config/config.go b/bridge/config/config.go index 18c6092082..75792ed0c3 100644 --- a/bridge/config/config.go +++ b/bridge/config/config.go @@ -121,6 +121,7 @@ type Protocol struct { MessageLength int // IRC, max length of a message allowed MessageQueue int // IRC, size of message queue for flood control MessageSplit bool // IRC, split long messages with newlines on MessageLength instead of clipping + MessageSplitMaxCount int // discord, split long messages into at most this many messages instead of clipping (MessageLength=1950 cannot be configured) Muc string // xmpp MxID string // matrix Name string // all protocols diff --git a/bridge/discord/discord.go b/bridge/discord/discord.go index 51dbe6bc71..5b10226a52 100644 --- a/bridge/discord/discord.go +++ b/bridge/discord/discord.go @@ -316,6 +316,7 @@ func (b *Bdiscord) handleEventBotUser(msg *config.Message, channelID string) (st // Upload a file if it exists if msg.Extra != nil { for _, rmsg := range helper.HandleExtra(msg, b.General) { + // TODO: Use ClipOrSplitMessage rmsg.Text = helper.ClipMessage(rmsg.Text, MessageLength, b.GetString("MessageClipped")) if _, err := b.c.ChannelMessageSend(channelID, rmsg.Username+rmsg.Text); err != nil { b.Log.Errorf("Could not send message %#v: %s", rmsg, err) @@ -327,35 +328,53 @@ func (b *Bdiscord) handleEventBotUser(msg *config.Message, channelID string) (st } } - msg.Text = helper.ClipMessage(msg.Text, MessageLength, b.GetString("MessageClipped")) - msg.Text = b.replaceUserMentions(msg.Text) - // Edit message if msg.ID != "" { - _, err := b.c.ChannelMessageEdit(channelID, msg.ID, msg.Username+msg.Text) - return msg.ID, err + // Exploit that a discord message ID is actually just a large number, and we encode a list of IDs by separating them with ";". + var msgIds = strings.Split(msg.ID, ";") + msgParts := helper.ClipOrSplitMessage(b.replaceUserMentions(msg.Text), MessageLength, b.GetString("MessageClipped"), len(msgIds)) + for len(msgParts) < len(msgIds) { + msgParts = append(msgParts, "((obsoleted by edit))") + } + for i := range msgParts { + // In case of split-messages where some parts remain the same (i.e. only a typo-fix in a huge message), this causes some noop-updates. + // TODO: Optimize away noop-updates of un-edited messages + // TODO: Use RemoteNickFormat instead of this broken concatenation + _, err := b.c.ChannelMessageEdit(channelID, msgIds[i], msg.Username+msgParts[i]) + if err != nil { + return "", err + } + } + return msg.ID, nil } - m := discordgo.MessageSend{ - Content: msg.Username + msg.Text, - AllowedMentions: b.getAllowedMentions(), - } + msgParts := helper.ClipOrSplitMessage(b.replaceUserMentions(msg.Text), MessageLength, b.GetString("MessageClipped"), b.GetInt("MessageSplitMaxCount")) + var msgIds = []string{} - if msg.ParentValid() { - m.Reference = &discordgo.MessageReference{ - MessageID: msg.ParentID, - ChannelID: channelID, - GuildID: b.guildID, + for _, msgPart := range msgParts { + m := discordgo.MessageSend{ + Content: msg.Username + msgPart, + AllowedMentions: b.getAllowedMentions(), } - } - // Post normal message - res, err := b.c.ChannelMessageSendComplex(channelID, &m) - if err != nil { - return "", err + if msg.ParentValid() { + m.Reference = &discordgo.MessageReference{ + MessageID: msg.ParentID, + ChannelID: channelID, + GuildID: b.guildID, + } + } + + // Post normal message + res, err := b.c.ChannelMessageSendComplex(channelID, &m) + if err != nil { + return "", err + } + msgIds = append(msgIds, res.ID) } - return res.ID, nil + // Exploit that a discord message ID is actually just a large number, so we encode a list of IDs by separating them with ";". + return strings.Join(msgIds, ";"), nil } // handleUploadFile handles native upload of files diff --git a/matterbridge.toml.sample b/matterbridge.toml.sample index 5932b269a3..a3c471f5e5 100644 --- a/matterbridge.toml.sample +++ b/matterbridge.toml.sample @@ -925,10 +925,17 @@ ShowTopicChange=false # Supported from the following bridges: slack SyncTopic=false -#Message to show when a message is too big -#Default "" +# Message to show when a message is too big +# Default "" MessageClipped="" +# Before clipping, try to split messages into at most this many parts. 0 is treated like 1. +# Be careful with large numbers, as this might cause flooding. +# Example: A maximum telegram message of 4096 bytes is received. This requires 3 Discord +# messages (each capped at a hardcoded 1950 bytes). +# Default 1 +MessageSplitMaxCount=3 + ################################################################### #telegram section ################################################################### From a91a14c67b8907639e1d395605b20d881450c4fc Mon Sep 17 00:00:00 2001 From: Ben Wiederhake Date: Fri, 8 Mar 2024 00:21:07 +0100 Subject: [PATCH 3/3] Implement discord webhooks message splitting --- bridge/discord/webhook.go | 143 ++++++++++++++++++++++---------------- 1 file changed, 84 insertions(+), 59 deletions(-) diff --git a/bridge/discord/webhook.go b/bridge/discord/webhook.go index b518ea6262..4e647b3856 100644 --- a/bridge/discord/webhook.go +++ b/bridge/discord/webhook.go @@ -2,6 +2,7 @@ package bdiscord import ( "bytes" + "strings" "github.com/42wim/matterbridge/bridge/config" "github.com/42wim/matterbridge/bridge/helper" @@ -42,13 +43,65 @@ func (b *Bdiscord) maybeGetLocalAvatar(msg *config.Message) string { return "" } +func (b *Bdiscord) webhookSendTextOnly(msg *config.Message, channelID string) (string, error) { + msgParts := helper.ClipOrSplitMessage(msg.Text, MessageLength, b.GetString("MessageClipped"), b.GetInt("MessageSplitMaxCount")) + var msgIds = []string{} + for _, msgPart := range msgParts { + res, err := b.transmitter.Send( + channelID, + &discordgo.WebhookParams{ + Content: msgPart, + Username: msg.Username, + AvatarURL: msg.Avatar, + AllowedMentions: b.getAllowedMentions(), + }, + ) + if err != nil { + return "", err + } else { + msgIds = append(msgIds, res.ID) + } + } + // Exploit that a discord message ID is actually just a large number, so we encode a list of IDs by separating them with ";". + return strings.Join(msgIds, ";"), nil +} + +func (b *Bdiscord) webhookSendFilesOnly(msg *config.Message, channelID string) error { + for _, f := range msg.Extra["file"] { + fi := f.(config.FileInfo) + file := discordgo.File{ + Name: fi.Name, + ContentType: "", + Reader: bytes.NewReader(*fi.Data), + } + content := fi.Comment + + // Cannot use the resulting ID for any edits anyway, so throw it away. + // This has to be re-enabled when we implement message deletion. + _, err := b.transmitter.Send( + channelID, + &discordgo.WebhookParams{ + Username: msg.Username, + AvatarURL: msg.Avatar, + Files: []*discordgo.File{&file}, + Content: content, + AllowedMentions: b.getAllowedMentions(), + }, + ) + if err != nil { + b.Log.Errorf("Could not send file %#v for message %#v: %s", file, msg, err) + return err + } + } + return nil +} + // webhookSend send one or more message via webhook, taking care of file // uploads (from slack, telegram or mattermost). // Returns messageID and error. -func (b *Bdiscord) webhookSend(msg *config.Message, channelID string) (*discordgo.Message, error) { +func (b *Bdiscord) webhookSend(msg *config.Message, channelID string) (string, error) { var ( - res *discordgo.Message - res2 *discordgo.Message + res string err error ) @@ -61,48 +114,11 @@ func (b *Bdiscord) webhookSend(msg *config.Message, channelID string) (*discordg // We can't send empty messages. if msg.Text != "" { - res, err = b.transmitter.Send( - channelID, - &discordgo.WebhookParams{ - Content: msg.Text, - Username: msg.Username, - AvatarURL: msg.Avatar, - AllowedMentions: b.getAllowedMentions(), - }, - ) - if err != nil { - b.Log.Errorf("Could not send text (%s) for message %#v: %s", msg.Text, msg, err) - } + res, err = b.webhookSendTextOnly(msg, channelID) } - if msg.Extra != nil { - for _, f := range msg.Extra["file"] { - fi := f.(config.FileInfo) - file := discordgo.File{ - Name: fi.Name, - ContentType: "", - Reader: bytes.NewReader(*fi.Data), - } - content := fi.Comment - - res2, err = b.transmitter.Send( - channelID, - &discordgo.WebhookParams{ - Username: msg.Username, - AvatarURL: msg.Avatar, - Files: []*discordgo.File{&file}, - Content: content, - AllowedMentions: b.getAllowedMentions(), - }, - ) - if err != nil { - b.Log.Errorf("Could not send file %#v for message %#v: %s", file, msg, err) - } - } - } - - if msg.Text == "" { - res = res2 + if err == nil && msg.Extra != nil { + err = b.webhookSendFilesOnly(msg, channelID) } return res, err @@ -120,35 +136,44 @@ func (b *Bdiscord) handleEventWebhook(msg *config.Message, channelID string) (st return "", nil } - msg.Text = helper.ClipMessage(msg.Text, MessageLength, b.GetString("MessageClipped")) - msg.Text = b.replaceUserMentions(msg.Text) // discord username must be [0..32] max if len(msg.Username) > 32 { msg.Username = msg.Username[0:32] } if msg.ID != "" { + // Exploit that a discord message ID is actually just a large number, and we encode a list of IDs by separating them with ";". + var msgIds = strings.Split(msg.ID, ";") + msgParts := helper.ClipOrSplitMessage(b.replaceUserMentions(msg.Text), MessageLength, b.GetString("MessageClipped"), len(msgIds)) + for len(msgParts) < len(msgIds) { + msgParts = append(msgParts, "((obsoleted by edit))") + } b.Log.Debugf("Editing webhook message") - err := b.transmitter.Edit(channelID, msg.ID, &discordgo.WebhookParams{ - Content: msg.Text, - Username: msg.Username, - AllowedMentions: b.getAllowedMentions(), - }) - if err == nil { + var edit_err error = nil + for i := range msgParts { + // In case of split-messages where some parts remain the same (i.e. only a typo-fix in a huge message), this causes some noop-updates. + // TODO: Optimize away noop-updates of un-edited messages + edit_err = b.transmitter.Edit(channelID, msgIds[i], &discordgo.WebhookParams{ + Content: msgParts[i], + Username: msg.Username, + AllowedMentions: b.getAllowedMentions(), + }) + if edit_err != nil { + break + } + } + if edit_err == nil { return msg.ID, nil } - b.Log.Errorf("Could not edit webhook message: %s", err) + b.Log.Errorf("Could not edit webhook message(s): %s; sending as new message(s) instead", edit_err) } b.Log.Debugf("Processing webhook sending for message %#v", msg) - discordMsg, err := b.webhookSend(msg, channelID) + msg.Text = b.replaceUserMentions(msg.Text) + msgId, err := b.webhookSend(msg, channelID) if err != nil { - b.Log.Errorf("Could not broadcast via webhook for message %#v: %s", msg, err) + b.Log.Errorf("Could not broadcast via webhook for message %#v: %s", msgId, err) return "", err } - if discordMsg == nil { - return "", nil - } - - return discordMsg.ID, nil + return msgId, nil }