Skip to content

Commit

Permalink
freeswitch: Several improvements to socket management
Browse files Browse the repository at this point in the history
* add exponential backoff behavior when reconnecting to a FreeSWITCH
   which is down (1/5/10/30/90 second retries, counting x20 each)

* add monitoring for the ESL command activity of sockets.  Specifically,
   the module now remembers the last ESL command and will reap sockets
   which both do not have event subscriptions and have not run an ESL
   command in the last 86400 seconds.

* freeswitch_esl() will now return faster in case the connection is
   down (the lock might still be grabbed by the timer, reconnecting...),
   thus minimizing the risk of a service outage.

* improved handling for connect error cases (fd management)
  • Loading branch information
liviuchircu committed Feb 12, 2024
1 parent 01b5fdd commit 3e7231c
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 24 deletions.
7 changes: 6 additions & 1 deletion modules/freeswitch/fs_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ struct _fs_evs {
esl_port_t port;

esl_handle_t *handle;
unsigned int last_conn_jiffy;
unsigned int conn_attempts; /* consecutive */

rw_lock_t *stats_lk;
fs_stats stats;
Expand All @@ -105,7 +107,7 @@ struct _fs_evs {

unsigned long esl_reply_id; /* positive ID/counter for each FS esl cmd */
struct list_head esl_replies;

unsigned int last_esl_jiffy;
struct list_head events; /* events we're successfully subscribed to */
rw_lock_t *lists_lk; /* protects the flags + above lists */

Expand All @@ -115,6 +117,9 @@ struct _fs_evs {
struct list_head esl_cmd_list; /* "fs_sockets_esl" - pending ESL cmds */
};

#define fs_evs_connected(s) ((s)->handle && (s)->handle->connected \
&& (s)->handle->sock != ESL_SOCK_INVALID)

typedef fs_evs* (*get_evs_f) (const str *host, unsigned short port,
const str *user, const str *pass);
typedef fs_evs* (*get_evs_by_url_f) (const str *fs_url);
Expand Down
4 changes: 4 additions & 0 deletions modules/freeswitch/fs_ipc.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ unsigned long fs_ipc_send_esl_cmd(fs_evs *sock, const str *fs_cmd)
{
fs_ipc_esl_cmd *cmd;
unsigned long esl_reply_id;
unsigned int ticks;

cmd = shm_malloc(sizeof *cmd);
if (!cmd) {
Expand All @@ -93,8 +94,11 @@ unsigned long fs_ipc_send_esl_cmd(fs_evs *sock, const str *fs_cmd)
memset(cmd, 0, sizeof *cmd);

cmd->sock = sock;
ticks = get_ticks();

lock_start_write(sock->lists_lk);
/* we're only interested in usage at script level, not the actual ESL */
sock->last_esl_jiffy = ticks;
cmd->esl_reply_id = sock->esl_reply_id++;
lock_stop_write(sock->lists_lk);

Expand Down
96 changes: 74 additions & 22 deletions modules/freeswitch/fs_proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,19 +54,49 @@ extern void fs_api_set_proc_no(void);
extern void evs_free(fs_evs *sock);
extern struct fs_event *get_event(fs_evs *sock, const str *name);

static int fs_reconnect_backoff_v[] = {1, 5, 10, 30, 90}; /* sec */
static int fs_reconnect_backoff_n = 20; /* bump the backoff every N failures */
#define fs_reconnect_backoff_v_sz \
(sizeof fs_reconnect_backoff_v/sizeof fs_reconnect_backoff_v[0])

/* sockets with no event subscriptions and no ESL commands ran for N seconds */
int fs_no_esl_timeout = 86400; /* sec */

#define SHOULD_KEEP_EVS(sock) ((sock)->ref > 0 || \
/* has the script writer run at least 1 FS ESL command over it? */ \
((sock)->esl_reply_id > 1 && \
/* if yes, give it a reasonable # of reconnect attempts... */ \
((!(sock)->handle->connected && (sock)->conn_attempts < \
(fs_reconnect_backoff_n * fs_reconnect_backoff_v_sz)) \
/* ... and also a reasonable time with no ESL commands at all */ \
|| ((sock)->handle->connected && \
(get_ticks() - (sock)->last_esl_jiffy) < fs_no_esl_timeout)) \
))

static inline int fs_reactor_del_reader(int fd, int idx)
{
int rc;

if (idx > 0 && (rc = reactor_del_reader(fd, idx, IO_FD_CLOSING)) != 0) {
LM_ERR("failed to delete sock %d, idx %d\n", fd, idx);
idx = -1;
} else {
return 0;
}

if (idx < 0 && (rc = reactor_del_reader(fd, -1, IO_FD_CLOSING)) != 0)
LM_DBG("failed to delete sock %d, idx -1\n", fd);

return rc;
}

static void destroy_fs_evs(fs_evs *sock, int idx)
{
esl_status_t rc;

LM_DBG("destroying sock %s:%d\n", sock->host.s, sock->port);

if (idx > 0 && reactor_del_reader(sock->handle->sock, idx, IO_FD_CLOSING) != 0) {
LM_ERR("failed to delete sock %d, idx %d\n", sock->handle->sock, idx);
idx = -1;
}

if (idx < 0 && reactor_del_reader(sock->handle->sock, -1, IO_FD_CLOSING) != 0)
LM_DBG("failed to delete sock %d, idx -1\n", sock->handle->sock);
fs_reactor_del_reader(sock->handle->sock, idx);

rc = esl_disconnect(sock->handle);
if (rc != ESL_SUCCESS)
Expand Down Expand Up @@ -178,13 +208,16 @@ void prepare_reconnect(fs_evs *sock)
ev->action = FS_EVENT_SUB;
}

evs_reset_flags(sock, FS_EVS_FL_CONNECTED);

lock_start_write(sockets_esl_lock);
if (list_empty(&sock->esl_cmd_list))
list_add_tail(&sock->esl_cmd_list, fs_sockets_esl);
lock_stop_write(sockets_esl_lock);

lock_start_write(sockets_down_lock);
list_add_tail(&sock->reconnect_list, fs_sockets_down);
if (list_empty(&sock->reconnect_list))
list_add_tail(&sock->reconnect_list, fs_sockets_down);
lock_stop_write(sockets_down_lock);
}

Expand All @@ -205,9 +238,9 @@ inline static int handle_io(struct fd_map *fm, int idx, int event_type)
LM_DBG("FS data available on sock %s:%d, ref: %d\n",
sock->host.s, sock->port, sock->ref);

/* ignore the event: nobody's using this socket anymore, close it */
lock_start_write(sockets_lock);
if (sock->ref == 0) {
/* ignore the event, the socket is unused */
destroy_fs_evs(sock, idx);
lock_stop_write(sockets_lock);
return 0;
Expand All @@ -221,18 +254,13 @@ inline static int handle_io(struct fd_map *fm, int idx, int event_type)
LM_ERR("read error %d on FS sock %.*s:%d. Reconnecting...\n",
rc, sock->host.len, sock->host.s, sock->port);

if (reactor_del_reader(sock->handle->sock, idx,
IO_FD_CLOSING) != 0) {
LM_ERR("del failed for sock %d\n", sock->handle->sock);
return 0;
}
fs_reactor_del_reader(sock->handle->sock, idx);

rc = esl_disconnect(sock->handle);
if (rc != ESL_SUCCESS) {
LM_ERR("disconnect error %d on FS sock %.*s:%d\n",
rc, sock->host.len, sock->host.s, sock->port);
sock->handle->connected = 0;
return 0;
}

prepare_reconnect(sock);
Expand Down Expand Up @@ -404,6 +432,9 @@ int update_event_subscriptions(fs_evs *sock)
/* handle any pending event actions */
list_for_each_safe(_, __, &sock->events) {
event = list_entry(_, struct fs_event, list);
LM_DBG("process event %s, refsum: %d, action: %d\n", event->name.s,
event->refsum, event->action);

if (event->refsum > 0 && event->action == FS_EVENT_SUB) {
LM_DBG("subscribing to %s events on %s:%d\n",
event->name.s, sock->host.s, sock->port);
Expand Down Expand Up @@ -458,32 +489,47 @@ int update_event_subscriptions(fs_evs *sock)
return ret;
}

/* referenced by 1+ modules or has performed at least one ESL command */
#define SHOULD_KEEP_EVS(sock) ((sock)->ref > 0 || (sock)->esl_reply_id > 1)

void handle_reconnects(void)
{
struct list_head *_, *__;
fs_evs *sock;
int first = 0, backoff = 1, i = -1;
unsigned int now;

list_for_each_safe(_, __, fs_sockets_down) {
sock = list_entry(_, fs_evs, reconnect_list);

LM_DBG("reconnecting sock %s:%d\n", sock->host.s, sock->port);
i = sock->conn_attempts / fs_reconnect_backoff_n;
if (i >= fs_reconnect_backoff_v_sz)
i = fs_reconnect_backoff_v_sz - 1;
backoff = fs_reconnect_backoff_v[i];

now = get_ticks();
LM_DBG("reconnect check for sock %s:%d, backoff: %d/%d, attempts: %u, "
"last-attempt-ticks: %u/%u\n", sock->host.s, sock->port,
backoff, i, sock->conn_attempts, sock->last_conn_jiffy, now);

if (sock->last_conn_jiffy && (now - sock->last_conn_jiffy < backoff)) {
LM_DBG(" (delaying reconnect due to backoff)\n");
continue;
}

if (sock->handle) {
if (sock->handle->connected && sock->handle->sock != ESL_SOCK_INVALID) {
if (fs_evs_connected(sock)) {
if (!SHOULD_KEEP_EVS(sock)) {
destroy_fs_evs(sock, -1);
continue;
}

LM_DBG("outdated reconnect on %s:%d, skipping\n", sock->host.s, sock->port);
sock->conn_attempts = 0;
evs_set_flags(sock, FS_EVS_FL_CONNECTED);
list_del(&sock->reconnect_list);
INIT_LIST_HEAD(&sock->reconnect_list);
continue;
}
} else {
first = 1;
sock->handle = pkg_malloc(sizeof *sock->handle);
if (!sock->handle) {
LM_ERR("failed to create FS handle!\n");
Expand All @@ -492,7 +538,11 @@ void handle_reconnects(void)
memset(sock->handle, 0, sizeof *sock->handle);
}

LM_DBG("reconnecting to FS sock '%s:%d'\n", sock->host.s, sock->port);
sock->conn_attempts++;
sock->last_conn_jiffy = now;
evs_reset_flags(sock, FS_EVS_FL_CONNECTED);
LM_DBG("%sconnecting to FS sock '%s:%d'\n", first ? "":"re-",
sock->host.s, sock->port);

if (esl_connect_timeout(sock->handle, sock->host.s, sock->port,
sock->user.s, sock->pass.s, fs_connect_timeout) != ESL_SUCCESS) {
Expand All @@ -517,6 +567,8 @@ void handle_reconnects(void)
continue;
}

sock->conn_attempts = 0;
evs_set_flags(sock, FS_EVS_FL_CONNECTED);
list_del(&sock->reconnect_list);
INIT_LIST_HEAD(&sock->reconnect_list);
}
Expand All @@ -541,7 +593,7 @@ static void apply_socket_commands(void)
sock = list_entry(_, fs_evs, esl_cmd_list);

/* above connect may have failed for this socket; skip it for now */
if (SHOULD_KEEP_EVS(sock) && !list_empty(&sock->reconnect_list))
if (!SHOULD_KEEP_EVS(sock) || !list_empty(&sock->reconnect_list))
continue;

rc = update_event_subscriptions(sock);
Expand Down
1 change: 1 addition & 0 deletions modules/freeswitch_scripting/fss_evs.c
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ int del_from_fss_sockets(fs_evs *sock, const str *event_name)
}
}

LM_DBG("event not found\n");
return -1;
}

Expand Down
8 changes: 7 additions & 1 deletion modules/freeswitch_scripting/fss_mod.c
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ static int fs_esl(struct sip_msg *msg, str *cmd, str *url,
{
fs_evs *sock;
pv_value_t reply_val;
str reply;
str reply = STR_NULL;
int ret = 1;

sock = fs_api.get_evs_by_url(url);
Expand All @@ -209,6 +209,12 @@ static int fs_esl(struct sip_msg *msg, str *cmd, str *url,
return -1;
}

if (!(sock->flags & FS_EVS_FL_CONNECTED)) {
LM_ERR("command failed (FS not connected: %.*s)\n", url->len, url->s);
ret = -1;
goto out;
}

LM_DBG("running '%.*s' on %s:%d\n", cmd->len, cmd->s,
sock->host.s, sock->port);

Expand Down

0 comments on commit 3e7231c

Please sign in to comment.