Skip to content

Commit

Permalink
Get rid of an old and terrible hack. Local stream sockets enqueue pac…
Browse files Browse the repository at this point in the history
…kets

directly on the peer's sockbuf, rather then the sender's sockbuf.  That
part of the code is fine, but in order to prevent the sender from queueing
infinite mbufs (because its sockbuf appears to be empty when you do that)
the code dynamically messed around with the sender's high water mark.

This blew up the new SOCK_SEQPACKET.  In particular, it blows up the
use of the PR_ATOMIC on stream sockets and can cause spurious EMSGSIZE
errors to be returned instead of the EWOULDBLOCK that should have been
returned.

Also fix, or partially the resource limit code which tries to reduce the
high water mark when a user is using too many mbufs.  This never worked
well and still doesn't, but it is in better shape now.

Get rid of the crufty code and simply add a flag to the signalsockbuf,
SSB_STOP, to stop the sender.

Also adjust the vkernel to increase the default socket buffer when
connecting to vknet instead of if_tap.  VKE currently issues non-blocking
writes to vknet/tap and we do not want to lose packets for no good reason.
  • Loading branch information
Matthew Dillon committed May 27, 2008
1 parent dbfd168 commit 3a6117b
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 52 deletions.
12 changes: 9 additions & 3 deletions sys/dev/virtual/net/if_vke.c
Expand Up @@ -31,7 +31,7 @@
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $DragonFly: src/sys/dev/virtual/net/if_vke.c,v 1.8 2008/05/27 01:10:38 dillon Exp $
* $DragonFly: src/sys/dev/virtual/net/if_vke.c,v 1.9 2008/05/27 05:25:33 dillon Exp $
*/

#include <sys/param.h>
Expand Down Expand Up @@ -156,11 +156,17 @@ vke_start(struct ifnet *ifp)
return;

while ((m = ifq_dequeue(&ifp->if_snd, NULL)) != NULL) {
/*
* Copy the data into a single mbuf and write it out
* non-blocking.
*/
if (m->m_pkthdr.len <= MCLBYTES) {
m_copydata(m, 0, m->m_pkthdr.len, sc->sc_txbuf);
BPF_MTAP(ifp, m);
write(sc->sc_fd, sc->sc_txbuf, m->m_pkthdr.len);
ifp->if_opackets++;
if (write(sc->sc_fd, sc->sc_txbuf, m->m_pkthdr.len) < 0)
ifp->if_oerrors++;
else
ifp->if_opackets++;
} else {
ifp->if_oerrors++;
}
Expand Down
19 changes: 14 additions & 5 deletions sys/kern/kern_resource.c
Expand Up @@ -37,7 +37,7 @@
*
* @(#)kern_resource.c 8.5 (Berkeley) 1/21/94
* $FreeBSD: src/sys/kern/kern_resource.c,v 1.55.2.5 2001/11/03 01:41:08 ps Exp $
* $DragonFly: src/sys/kern/kern_resource.c,v 1.34 2007/08/20 05:40:40 dillon Exp $
* $DragonFly: src/sys/kern/kern_resource.c,v 1.35 2008/05/27 05:25:34 dillon Exp $
*/

#include "opt_compat.h"
Expand Down Expand Up @@ -682,10 +682,19 @@ chgsbsize(struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t max)

crit_enter();
new = uip->ui_sbsize + to - *hiwat;
/* don't allow them to exceed max, but allow subtraction */
if (to > *hiwat && new > max) {
crit_exit();
return (0);

/*
* If we are trying to increase the socket buffer size
* Scale down the hi water mark when we exceed the user's
* allowed socket buffer space.
*
* We can't scale down too much or we will blow up atomic packet
* operations.
*/
if (to > *hiwat && to > MCLBYTES && new > max) {
to = to * max / new;
if (to < MCLBYTES)
to = MCLBYTES;
}
uip->ui_sbsize = new;
*hiwat = to;
Expand Down
9 changes: 5 additions & 4 deletions sys/kern/uipc_socket.c
Expand Up @@ -65,7 +65,7 @@
*
* @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
* $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.24 2003/11/11 17:18:18 silby Exp $
* $DragonFly: src/sys/kern/uipc_socket.c,v 1.47 2008/01/05 14:02:38 swildner Exp $
* $DragonFly: src/sys/kern/uipc_socket.c,v 1.48 2008/05/27 05:25:34 dillon Exp $
*/

#include "opt_inet.h"
Expand Down Expand Up @@ -554,12 +554,13 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
gotoerr(so->so_proto->pr_flags & PR_CONNREQUIRED ?
ENOTCONN : EDESTADDRREQ);
}
if ((atomic && resid > so->so_snd.ssb_hiwat) ||
clen > so->so_snd.ssb_hiwat) {
gotoerr(EMSGSIZE);
}
space = ssb_space(&so->so_snd);
if (flags & MSG_OOB)
space += 1024;
if ((atomic && resid > so->so_snd.ssb_hiwat) ||
clen > so->so_snd.ssb_hiwat)
gotoerr(EMSGSIZE);
if (space < resid + clen && uio &&
(atomic || space < so->so_snd.ssb_lowat || space < clen)) {
if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT))
Expand Down
4 changes: 2 additions & 2 deletions sys/kern/uipc_socket2.c
Expand Up @@ -33,7 +33,7 @@
*
* @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
* $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.17 2002/08/31 19:04:55 dwmalone Exp $
* $DragonFly: src/sys/kern/uipc_socket2.c,v 1.30 2008/04/20 13:44:25 swildner Exp $
* $DragonFly: src/sys/kern/uipc_socket2.c,v 1.31 2008/05/27 05:25:34 dillon Exp $
*/

#include "opt_param.h"
Expand Down Expand Up @@ -423,7 +423,7 @@ ssb_reserve(struct signalsockbuf *ssb, u_long cc, struct socket *so,
* or when called from netgraph (ie, ngd_attach)
*/
if (cc > sb_max_adj)
return (0);
cc = sb_max_adj;
if (!chgsbsize(so->so_cred->cr_uidinfo, &ssb->ssb_hiwat, cc,
rl ? rl->rlim_cur : RLIM_INFINITY)) {
return (0);
Expand Down
52 changes: 23 additions & 29 deletions sys/kern/uipc_usrreq.c
Expand Up @@ -32,7 +32,7 @@
*
* From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
* $FreeBSD: src/sys/kern/uipc_usrreq.c,v 1.54.2.10 2003/03/04 17:28:09 nectar Exp $
* $DragonFly: src/sys/kern/uipc_usrreq.c,v 1.40 2008/05/27 01:10:39 dillon Exp $
* $DragonFly: src/sys/kern/uipc_usrreq.c,v 1.41 2008/05/27 05:25:34 dillon Exp $
*/

#include <sys/param.h>
Expand Down Expand Up @@ -71,7 +71,7 @@ static struct unp_head unp_shead, unp_dhead;
* Unix communications domain.
*
* TODO:
* SEQPACKET, RDM
* RDM
* rethink name space problems
* need a proper out-of-band
* lock pushdown
Expand Down Expand Up @@ -233,7 +233,6 @@ uipc_rcvd(struct socket *so, int flags)
{
struct unpcb *unp = so->so_pcb;
struct socket *so2;
u_long newhiwat;

if (unp == NULL)
return EINVAL;
Expand All @@ -246,19 +245,18 @@ uipc_rcvd(struct socket *so, int flags)
case SOCK_SEQPACKET:
if (unp->unp_conn == NULL)
break;
so2 = unp->unp_conn->unp_socket;
/*
* Adjust backpressure on sender
* and wakeup any waiting to write.
* Because we are transfering mbufs directly to the
* peer socket we have to use SSB_STOP on the sender
* to prevent it from building up infinite mbufs.
*/
so2->so_snd.ssb_mbmax += unp->unp_mbcnt - so->so_rcv.ssb_mbcnt;
unp->unp_mbcnt = so->so_rcv.ssb_mbcnt;
newhiwat =
so2->so_snd.ssb_hiwat + unp->unp_cc - so->so_rcv.ssb_cc;
chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.ssb_hiwat,
newhiwat, RLIM_INFINITY);
unp->unp_cc = so->so_rcv.ssb_cc;
sowwakeup(so2);
so2 = unp->unp_conn->unp_socket;
if (so->so_rcv.ssb_cc < so2->so_snd.ssb_hiwat &&
so->so_rcv.ssb_mbcnt < so2->so_snd.ssb_mbmax
) {
so2->so_snd.ssb_flags &= ~SSB_STOP;
sowwakeup(so2);
}
break;

default:
Expand All @@ -276,7 +274,6 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
int error = 0;
struct unpcb *unp = so->so_pcb;
struct socket *so2;
u_long newhiwat;

if (unp == NULL) {
error = EINVAL;
Expand Down Expand Up @@ -368,14 +365,17 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
sbappend(&so2->so_rcv.sb, m);
m = NULL;
}
so->so_snd.ssb_mbmax -=
so2->so_rcv.ssb_mbcnt - unp->unp_conn->unp_mbcnt;
unp->unp_conn->unp_mbcnt = so2->so_rcv.ssb_mbcnt;
newhiwat = so->so_snd.ssb_hiwat -
(so2->so_rcv.ssb_cc - unp->unp_conn->unp_cc);
chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.ssb_hiwat,
newhiwat, RLIM_INFINITY);
unp->unp_conn->unp_cc = so2->so_rcv.ssb_cc;

/*
* Because we are transfering mbufs directly to the
* peer socket we have to use SSB_STOP on the sender
* to prevent it from building up infinite mbufs.
*/
if (so2->so_rcv.ssb_cc >= so->so_snd.ssb_hiwat ||
so2->so_rcv.ssb_mbcnt >= so->so_snd.ssb_mbmax
) {
so->so_snd.ssb_flags |= SSB_STOP;
}
sorwakeup(so2);
break;

Expand Down Expand Up @@ -406,16 +406,10 @@ static int
uipc_sense(struct socket *so, struct stat *sb)
{
struct unpcb *unp = so->so_pcb;
struct socket *so2;

if (unp == NULL)
return EINVAL;
sb->st_blksize = so->so_snd.ssb_hiwat;
if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) &&
unp->unp_conn != NULL) {
so2 = unp->unp_conn->unp_socket;
sb->st_blksize += so2->so_rcv.ssb_cc;
}
sb->st_dev = NOUDEV;
if (unp->unp_ino == 0) /* make up a non-zero inode number */
unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
Expand Down
7 changes: 6 additions & 1 deletion sys/platform/vkernel/platform/init.c
Expand Up @@ -31,7 +31,7 @@
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $DragonFly: src/sys/platform/vkernel/platform/init.c,v 1.54 2008/05/27 01:10:45 dillon Exp $
* $DragonFly: src/sys/platform/vkernel/platform/init.c,v 1.55 2008/05/27 05:25:35 dillon Exp $
*/

#include <sys/types.h>
Expand Down Expand Up @@ -973,6 +973,8 @@ unix_connect(const char *path)
struct sockaddr_un sunx;
int len;
int net_fd;
int sndbuf = 262144;
struct stat st;

snprintf(sunx.sun_path, sizeof(sunx.sun_path), "%s", path);
len = offsetof(struct sockaddr_un, sun_path[strlen(sunx.sun_path)]);
Expand All @@ -987,6 +989,9 @@ unix_connect(const char *path)
close(net_fd);
return(-1);
}
setsockopt(net_fd, SOL_SOCKET, SO_SNDBUF, &sndbuf, sizeof(sndbuf));
if (fstat(net_fd, &st) == 0)
printf("Network socket buffer: %d bytes\n", st.st_blksize);
fcntl(net_fd, F_SETFL, O_NONBLOCK);
return(net_fd);
}
Expand Down
24 changes: 19 additions & 5 deletions sys/sys/socketvar.h
Expand Up @@ -32,7 +32,7 @@
*
* @(#)socketvar.h 8.3 (Berkeley) 2/19/95
* $FreeBSD: src/sys/sys/socketvar.h,v 1.46.2.10 2003/08/24 08:24:39 hsu Exp $
* $DragonFly: src/sys/sys/socketvar.h,v 1.30 2007/11/07 18:24:04 dillon Exp $
* $DragonFly: src/sys/sys/socketvar.h,v 1.31 2008/05/27 05:25:36 dillon Exp $
*/

#ifndef _SYS_SOCKETVAR_H_
Expand Down Expand Up @@ -83,6 +83,7 @@ struct signalsockbuf {
#define SSB_AIO 0x80 /* AIO operations queued */
#define SSB_KNOTE 0x100 /* kernel note attached */
#define SSB_MEVENT 0x200 /* need message event notification */
#define SSB_STOP 0x400 /* backpressure indicator */

/*
* Per-socket kernel structure. Contains universal send and receive queues,
Expand Down Expand Up @@ -220,11 +221,24 @@ struct xsocket {
* How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
* This is problematical if the fields are unsigned, as the space might
* still be negative (cc > hiwat or mbcnt > mbmax). Should detect
* overflow and return 0. Should use "lmin" but it doesn't exist now.
* overflow and return 0.
*
* SSB_STOP ignores cc/hiwat and returns 0. This is used by unix domain
* stream sockets to signal backpressure.
*/
#define ssb_space(ssb) \
((long)imin((int)((ssb)->ssb_hiwat - (ssb)->ssb_cc), \
(int)((ssb)->ssb_mbmax - (ssb)->ssb_mbcnt)))
static __inline
long
ssb_space(struct signalsockbuf *ssb)
{
long bleft;
long mleft;

if (ssb->ssb_flags & SSB_STOP)
return(0);
bleft = ssb->ssb_hiwat - ssb->ssb_cc;
mleft = ssb->ssb_mbmax - ssb->ssb_mbcnt;
return((bleft < mleft) ? bleft : mleft);
}

#define ssb_append(ssb, m) \
sbappend(&(ssb)->sb, m)
Expand Down
6 changes: 3 additions & 3 deletions sys/sys/unpcb.h
Expand Up @@ -32,7 +32,7 @@
*
* @(#)unpcb.h 8.1 (Berkeley) 6/2/93
* $FreeBSD: src/sys/sys/unpcb.h,v 1.9.2.1 2002/03/09 05:22:23 dd Exp $
* $DragonFly: src/sys/sys/unpcb.h,v 1.4 2005/04/20 19:38:22 hsu Exp $
* $DragonFly: src/sys/sys/unpcb.h,v 1.5 2008/05/27 05:25:36 dillon Exp $
*/

#ifndef _SYS_UNPCB_H_
Expand Down Expand Up @@ -79,8 +79,8 @@ struct unpcb {
struct unp_head unp_refs; /* referencing socket linked list */
LIST_ENTRY(unpcb) unp_reflink; /* link in unp_refs list */
struct sockaddr_un *unp_addr; /* bound address of socket */
int unp_cc; /* copy of rcv.sb_cc */
int unp_mbcnt; /* copy of rcv.sb_mbcnt */
int unused01;
int unused02;
unp_gen_t unp_gencnt; /* generation count of this instance */
int unp_flags; /* flags */
struct xucred unp_peercred; /* peer credentials, if applicable */
Expand Down

0 comments on commit 3a6117b

Please sign in to comment.