Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Persistent message map #1996

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
179 changes: 90 additions & 89 deletions bridge/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,95 +87,96 @@ type ChannelMember struct {
type ChannelMembers []ChannelMember

type Protocol struct {
AllowMention []string // discord
AuthCode string // steam
BindAddress string // mattermost, slack // DEPRECATED
Buffer int // api
Charset string // irc
ClientID string // msteams
ColorNicks bool // only irc for now
Debug bool // general
DebugLevel int // only for irc now
DisableWebPagePreview bool // telegram
EditSuffix string // mattermost, slack, discord, telegram, gitter
EditDisable bool // mattermost, slack, discord, telegram, gitter
HTMLDisable bool // matrix
IconURL string // mattermost, slack
IgnoreFailureOnStart bool // general
IgnoreNicks string // all protocols
IgnoreMessages string // all protocols
Jid string // xmpp
JoinDelay string // all protocols
Label string // all protocols
Login string // mattermost, matrix
LogFile string // general
MediaDownloadBlackList []string
MediaDownloadPath string // Basically MediaServerUpload, but instead of uploading it, just write it to a file on the same server.
MediaDownloadSize int // all protocols
MediaServerDownload string
MediaServerUpload string
MediaConvertTgs string // telegram
MediaConvertWebPToPNG bool // telegram
MessageDelay int // IRC, time in millisecond to wait between messages
MessageFormat string // telegram
MessageLength int // IRC, max length of a message allowed
MessageQueue int // IRC, size of message queue for flood control
MessageSplit bool // IRC, split long messages with newlines on MessageLength instead of clipping
Muc string // xmpp
MxID string // matrix
Name string // all protocols
Nick string // all protocols
NickFormatter string // mattermost, slack
NickServNick string // IRC
NickServUsername string // IRC
NickServPassword string // IRC
NicksPerRow int // mattermost, slack
NoHomeServerSuffix bool // matrix
NoSendJoinPart bool // all protocols
NoTLS bool // mattermost, xmpp
Password string // IRC,mattermost,XMPP,matrix
PrefixMessagesWithNick bool // mattemost, slack
PreserveThreading bool // slack
Protocol string // all protocols
QuoteDisable bool // telegram
QuoteFormat string // telegram
QuoteLengthLimit int // telegram
RealName string // IRC
RejoinDelay int // IRC
ReplaceMessages [][]string // all protocols
ReplaceNicks [][]string // all protocols
RemoteNickFormat string // all protocols
RunCommands []string // IRC
Server string // IRC,mattermost,XMPP,discord,matrix
SessionFile string // msteams,whatsapp
ShowJoinPart bool // all protocols
ShowTopicChange bool // slack
ShowUserTyping bool // slack
ShowEmbeds bool // discord
SkipTLSVerify bool // IRC, mattermost
SkipVersionCheck bool // mattermost
StripNick bool // all protocols
StripMarkdown bool // irc
SyncTopic bool // slack
TengoModifyMessage string // general
Team string // mattermost, keybase
TeamID string // msteams
TenantID string // msteams
Token string // gitter, slack, discord, api, matrix
Topic string // zulip
URL string // mattermost, slack // DEPRECATED
UseAPI bool // mattermost, slack
UseLocalAvatar []string // discord
UseSASL bool // IRC
UseTLS bool // IRC
UseDiscriminator bool // discord
UseFirstName bool // telegram
UseUserName bool // discord, matrix, mattermost
UseInsecureURL bool // telegram
UserName string // IRC
VerboseJoinPart bool // IRC
WebhookBindAddress string // mattermost, slack
WebhookURL string // mattermost, slack
AllowMention []string // discord
AuthCode string // steam
BindAddress string // mattermost, slack // DEPRECATED
Buffer int // api
Charset string // irc
ClientID string // msteams
ColorNicks bool // only irc for now
Debug bool // general
DebugLevel int // only for irc now
DisableWebPagePreview bool // telegram
EditDisable bool // mattermost, slack, discord, telegram, gitter
EditSuffix string // mattermost, slack, discord, telegram, gitter
HTMLDisable bool // matrix
IconURL string // mattermost, slack
IgnoreFailureOnStart bool // general
IgnoreMessages string // all protocols
IgnoreNicks string // all protocols
Jid string // xmpp
JoinDelay string // all protocols
Label string // all protocols
LogFile string // general
Login string // mattermost, matrix
MediaConvertTgs string // telegram
MediaConvertWebPToPNG bool // telegram
MediaDownloadBlackList []string // all protocols
MediaDownloadPath string // Basically MediaServerUpload, but instead of uploading it, just write it to a file on the same server.
MediaDownloadSize int // all protocols
MediaServerDownload string // all protocols
MediaServerUpload string // all protocols
MessageDelay int // IRC, time in millisecond to wait between messages
MessageFormat string // telegram
MessageLength int // IRC, max length of a message allowed
MessageQueue int // IRC, size of message queue for flood control
MessageSplit bool // IRC, split long messages with newlines on MessageLength instead of clipping
Muc string // xmpp
MxID string // matrix
Name string // all protocols
Nick string // all protocols
NickFormatter string // mattermost, slack
NickServNick string // IRC
NickServPassword string // IRC
NickServUsername string // IRC
NicksPerRow int // mattermost, slack
NoHomeServerSuffix bool // matrix
NoSendJoinPart bool // all protocols
NoTLS bool // mattermost, xmpp
Password string // IRC,mattermost,XMPP,matrix
PersistentMessageStorePath string // all protocols
PrefixMessagesWithNick bool // mattemost, slack
PreserveThreading bool // slack, discord, telegram, whatsapp
Protocol string // all protocols
QuoteDisable bool // telegram
QuoteFormat string // telegram
QuoteLengthLimit int // telegram
RealName string // IRC
RejoinDelay int // IRC
RemoteNickFormat string // all protocols
ReplaceMessages [][]string // all protocols
ReplaceNicks [][]string // all protocols
RunCommands []string // IRC
Server string // IRC,mattermost,XMPP,discord,matrix
SessionFile string // msteams,whatsapp
ShowEmbeds bool // discord
ShowJoinPart bool // all protocols
ShowTopicChange bool // slack
ShowUserTyping bool // slack
SkipTLSVerify bool // IRC, mattermost
SkipVersionCheck bool // mattermost
StripMarkdown bool // irc
StripNick bool // all protocols
SyncTopic bool // slack
Team string // mattermost, keybase
TeamID string // msteams
TenantID string // msteams
TengoModifyMessage string // general
Token string // gitter, slack, discord, api, matrix
Topic string // zulip
URL string // mattermost, slack // DEPRECATED
UseAPI bool // mattermost, slack
UseDiscriminator bool // discord
UseFirstName bool // telegram
UseInsecureURL bool // telegram
UseLocalAvatar []string // discord
UserName string // IRC
UseSASL bool // IRC
UseTLS bool // IRC
UseUserName bool // discord, matrix, mattermost
VerboseJoinPart bool // IRC
WebhookBindAddress string // mattermost, slack
WebhookURL string // mattermost, slack
}

type ChannelOptions struct {
Expand Down
72 changes: 66 additions & 6 deletions gateway/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"github.com/d5/tengo/v2/stdlib"
lru "github.com/hashicorp/golang-lru"
"github.com/kyokomi/emoji/v2"
"github.com/philippgille/gokv"
"github.com/sirupsen/logrus"
)

Expand All @@ -29,21 +30,24 @@ type Gateway struct {
Message chan config.Message
Name string
Messages *lru.Cache
MessageStore gokv.Store
CanonicalStore gokv.Store
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason it can't be in 1 file?

Copy link
Contributor Author

@yousefmansy1 yousefmansy1 Mar 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, in order to add this feature without significant refactor we are still following the whole concept of a "canonical" message.

We need two separate mappings one for message->canonical (CanonicalStore) and another for canonical->message[] (MessageStore)

This is all highly related to this another PR:
#1991 (comment)
#1991 (comment)

On the file system the directories look like this:
image

Each bridge gets its own subdirectory

Copy link
Owner

@42wim 42wim Mar 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, but it's a k/v store, why can't you just put it in 1 file and use a "canonical" and "messages" prefix for the keys?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean we can I suppose, I don't really see how that's beneficial.

I feel the code is more readable to have distinct mappings for their functionality?
Plus, removes the risk of breaking old message stores by needing to change a hypothetical prefix.

What do you think?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

seems like I didn't see that you said a subdirectory per bridge.

In my opinion it's much nicer to have everything in 1 file instead of a lot of files. So I'm even proposing to just have one database containing everything.

Copy link
Contributor Author

@yousefmansy1 yousefmansy1 Mar 12, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Particularly for the subdirectories per bridge, I prefer that solution as its more flexible.

Flexible in the manner, that each folder is as if its its own table.
If a user needs to rename a bridge and don't want to lose their historical data they can just rename the directory.

Additionally Badger db does not support buckets/tables for each keystore so sub directories is really the only way to do it.
for example what I did in another PR with bbolt:
https://github.com/yousefmansy1/matterbridge/blob/5353b32c1a21d2655f8e12e76f81628373acd6f5/gateway/persistent.go#L21-L34

The way I'd like to treat it closer to a set of tables in a DB rather than one big KV store where we dump things in and "filter" with prefixes.
For the non technical user they can just treat the root directory as the whole db and ignore it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By the way unless we're always closing and opening the DB/KV handler with each Read/Write we do need individual stores for each one as each KV store actually holds a lock over the store. No other stores will be able to open it until it closes it's connection.
Its probably a little stupid but we'll never run into this blocking if each gateway has its own store it keeps for itself.


logger *logrus.Entry
}

type BrMsgID struct {
br *bridge.Bridge
ID string
Protocol string
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why removing the br ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are directly storing these values in our persistent store, storing a reference to br, would just be garbage data when we read it back.
Based on the current usage of br for this struct we are only using it to get the protocol and destination gateway name.

if dest.Protocol == id.br.Protocol && dest.Name == id.br.Name && channel.ID == id.ChannelID {
return strings.Replace(id.ID, dest.Protocol+" ", "", 1)
}

https://github.com/yousefmansy1/matterbridge/blob/c0f5d0c5f7a5c7c9edd871bea1cc07ebadbcdb1a/gateway/gateway.go#L313-L315

For this persistent feature we are removing this reference and storing the direct values.
Unfortunately, that does mean if you change the name of the gateway things will break.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I understand not storing the br reference it self, but you can still reference br.Protocol and br.Name instead of creating those 2 variables? Or am I missing something about this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the in memory cache yes.
But for the persistent message map, if we restart the application all the memory references we have stored inside our value store will point whatever is the old br reference, which would be different for each run of the application.

I could be misunderstanding something about how references work in golang, correct me if I'm wrong.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you actually store br it's a reference, but br.Protocol or br.Name is a string

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh ok I see. Yes, that would make more sense and would make its functionality more clear.
Will update.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually maybe I'm misunderstanding

type BrMsgID struct {
	Protocol  bridge.Bridge.Protocol
	DestName  bridge.Bridge.Name
	ChannelID string
	ID        string
}

causes build errors:

yousef@DESKTOP-YOUSEF $ go build -tags whatsappmulti -gcflags=all="-N -l"
# github.com/42wim/matterbridge/gateway
gateway/gateway.go:40:25: syntax error: unexpected . in struct type; possibly missing semicolon or newline or }

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BrMsgID stays the same with br *bridge.Bridge, where you need the protocol just use br.Protocol
I'm doing this on mobile so I could also be misunderstanding your issue

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might also be misunderstanding what you mean, but this change is most important in this code snippet

https://github.com/yousefmansy1/matterbridge/blob/60219a39d1624df64de9421177b326bde269319f/gateway/persistent.go#L59-L74

When we get the []BrMsgID from the message store, anything that we read from those items cant come from any reference.

DestName string
ChannelID string
ID string
}

const apiProtocol = "api"

// New creates a new Gateway object associated with the specified router and
// following the given configuration.
func New(rootLogger *logrus.Logger, cfg *config.Gateway, r *Router) *Gateway {
func New(rootLogger *logrus.Logger, cfg *config.Gateway, r *Router) (*Gateway, error) {
logger := rootLogger.WithFields(logrus.Fields{"prefix": "gateway"})

cache, _ := lru.New(5000)
Expand All @@ -59,12 +63,55 @@ func New(rootLogger *logrus.Logger, cfg *config.Gateway, r *Router) *Gateway {
if err := gw.AddConfig(cfg); err != nil {
logger.Errorf("Failed to add configuration to gateway: %#v", err)
}
return gw

persistentMessageStorePath := gw.BridgeValues().General.PersistentMessageStorePath
usePersistent := persistentMessageStorePath != ""

if usePersistent {
rootPath := fmt.Sprintf("%s/%s", persistentMessageStorePath, gw.Name)
err := os.MkdirAll(rootPath, os.ModePerm)
if err != nil {
return nil, err
}

MessageStore, err := gw.getMessageMapStore(fmt.Sprintf("%s/Messages", rootPath))
if err != nil {
return nil, err
}
gw.MessageStore = MessageStore

CanonicalStore, err := gw.getMessageMapStore(fmt.Sprintf("%s/Canonical", rootPath))
if err != nil {
return nil, err
}
gw.CanonicalStore = CanonicalStore
}

return gw, nil
}

func (gw *Gateway) SetMessageMap(canonicalMsgID string, msgIDs []*BrMsgID) {
usePersistent := gw.BridgeValues().General.PersistentMessageStorePath != ""
if usePersistent {
gw.setDestMessagesToStore(canonicalMsgID, msgIDs)
} else {
gw.Messages.Add(canonicalMsgID, msgIDs)
}
}

// FindCanonicalMsgID returns the ID under which a message was stored in the cache.
func (gw *Gateway) FindCanonicalMsgID(protocol string, mID string) string {
ID := protocol + " " + mID

usePersistent := gw.BridgeValues().General.PersistentMessageStorePath != ""
if usePersistent {
return gw.getCanonicalMessageFromStore(ID)
} else {
return gw.getCanonicalMessageFromMemCache(ID)
}
}

func (gw *Gateway) getCanonicalMessageFromMemCache(ID string) string {
if gw.Messages.Contains(ID) {
return ID
}
Expand Down Expand Up @@ -259,13 +306,26 @@ func (gw *Gateway) getDestChannel(msg *config.Message, dest bridge.Bridge) []con
}

func (gw *Gateway) getDestMsgID(msgID string, dest *bridge.Bridge, channel *config.ChannelInfo) string {
var destID string

usePersistent := gw.BridgeValues().General.PersistentMessageStorePath != ""
if usePersistent {
destID = gw.getDestMessagesFromStore(msgID, dest, channel)
} else {
destID = gw.getDestMessageFromMemCache(msgID, dest, channel)
}

return strings.Replace(destID, dest.Protocol+" ", "", 1)
}

func (gw *Gateway) getDestMessageFromMemCache(msgID string, dest *bridge.Bridge, channel *config.ChannelInfo) string {
if res, ok := gw.Messages.Get(msgID); ok {
IDs := res.([]*BrMsgID)
for _, id := range IDs {
// check protocol, bridge name and channelname
// for people that reuse the same bridge multiple times. see #342
if dest.Protocol == id.br.Protocol && dest.Name == id.br.Name && channel.ID == id.ChannelID {
return strings.Replace(id.ID, dest.Protocol+" ", "", 1)
if dest.Protocol == id.Protocol && dest.Name == id.DestName && channel.ID == id.ChannelID {
return id.ID
}
}
}
Expand Down
8 changes: 7 additions & 1 deletion gateway/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,13 @@ func (gw *Gateway) handleMessage(rmsg *config.Message, dest *bridge.Bridge) []*B
if msgID == "" {
continue
}
brMsgIDs = append(brMsgIDs, &BrMsgID{dest, dest.Protocol + " " + msgID, channel.ID})
brMsgIDs = append(brMsgIDs,
&BrMsgID{
Protocol: dest.Protocol,
DestName: dest.Name,
ChannelID: channel.ID,
ID: msgID,
})
}
return brMsgIDs
}
Expand Down
Loading