Skip to content

Commit 3a6117b

Browse files
author
Matthew Dillon
committed
Get rid of an old and terrible hack. Local stream sockets enqueue packets
directly on the peer's sockbuf, rather then the sender's sockbuf. That part of the code is fine, but in order to prevent the sender from queueing infinite mbufs (because its sockbuf appears to be empty when you do that) the code dynamically messed around with the sender's high water mark. This blew up the new SOCK_SEQPACKET. In particular, it blows up the use of the PR_ATOMIC on stream sockets and can cause spurious EMSGSIZE errors to be returned instead of the EWOULDBLOCK that should have been returned. Also fix, or partially the resource limit code which tries to reduce the high water mark when a user is using too many mbufs. This never worked well and still doesn't, but it is in better shape now. Get rid of the crufty code and simply add a flag to the signalsockbuf, SSB_STOP, to stop the sender. Also adjust the vkernel to increase the default socket buffer when connecting to vknet instead of if_tap. VKE currently issues non-blocking writes to vknet/tap and we do not want to lose packets for no good reason.
1 parent dbfd168 commit 3a6117b

File tree

8 files changed

+81
-52
lines changed

8 files changed

+81
-52
lines changed

sys/dev/virtual/net/if_vke.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3232
* SUCH DAMAGE.
3333
*
34-
* $DragonFly: src/sys/dev/virtual/net/if_vke.c,v 1.8 2008/05/27 01:10:38 dillon Exp $
34+
* $DragonFly: src/sys/dev/virtual/net/if_vke.c,v 1.9 2008/05/27 05:25:33 dillon Exp $
3535
*/
3636

3737
#include <sys/param.h>
@@ -156,11 +156,17 @@ vke_start(struct ifnet *ifp)
156156
return;
157157

158158
while ((m = ifq_dequeue(&ifp->if_snd, NULL)) != NULL) {
159+
/*
160+
* Copy the data into a single mbuf and write it out
161+
* non-blocking.
162+
*/
159163
if (m->m_pkthdr.len <= MCLBYTES) {
160164
m_copydata(m, 0, m->m_pkthdr.len, sc->sc_txbuf);
161165
BPF_MTAP(ifp, m);
162-
write(sc->sc_fd, sc->sc_txbuf, m->m_pkthdr.len);
163-
ifp->if_opackets++;
166+
if (write(sc->sc_fd, sc->sc_txbuf, m->m_pkthdr.len) < 0)
167+
ifp->if_oerrors++;
168+
else
169+
ifp->if_opackets++;
164170
} else {
165171
ifp->if_oerrors++;
166172
}

sys/kern/kern_resource.c

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
*
3838
* @(#)kern_resource.c 8.5 (Berkeley) 1/21/94
3939
* $FreeBSD: src/sys/kern/kern_resource.c,v 1.55.2.5 2001/11/03 01:41:08 ps Exp $
40-
* $DragonFly: src/sys/kern/kern_resource.c,v 1.34 2007/08/20 05:40:40 dillon Exp $
40+
* $DragonFly: src/sys/kern/kern_resource.c,v 1.35 2008/05/27 05:25:34 dillon Exp $
4141
*/
4242

4343
#include "opt_compat.h"
@@ -682,10 +682,19 @@ chgsbsize(struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t max)
682682

683683
crit_enter();
684684
new = uip->ui_sbsize + to - *hiwat;
685-
/* don't allow them to exceed max, but allow subtraction */
686-
if (to > *hiwat && new > max) {
687-
crit_exit();
688-
return (0);
685+
686+
/*
687+
* If we are trying to increase the socket buffer size
688+
* Scale down the hi water mark when we exceed the user's
689+
* allowed socket buffer space.
690+
*
691+
* We can't scale down too much or we will blow up atomic packet
692+
* operations.
693+
*/
694+
if (to > *hiwat && to > MCLBYTES && new > max) {
695+
to = to * max / new;
696+
if (to < MCLBYTES)
697+
to = MCLBYTES;
689698
}
690699
uip->ui_sbsize = new;
691700
*hiwat = to;

sys/kern/uipc_socket.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@
6565
*
6666
* @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94
6767
* $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.24 2003/11/11 17:18:18 silby Exp $
68-
* $DragonFly: src/sys/kern/uipc_socket.c,v 1.47 2008/01/05 14:02:38 swildner Exp $
68+
* $DragonFly: src/sys/kern/uipc_socket.c,v 1.48 2008/05/27 05:25:34 dillon Exp $
6969
*/
7070

7171
#include "opt_inet.h"
@@ -554,12 +554,13 @@ sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
554554
gotoerr(so->so_proto->pr_flags & PR_CONNREQUIRED ?
555555
ENOTCONN : EDESTADDRREQ);
556556
}
557+
if ((atomic && resid > so->so_snd.ssb_hiwat) ||
558+
clen > so->so_snd.ssb_hiwat) {
559+
gotoerr(EMSGSIZE);
560+
}
557561
space = ssb_space(&so->so_snd);
558562
if (flags & MSG_OOB)
559563
space += 1024;
560-
if ((atomic && resid > so->so_snd.ssb_hiwat) ||
561-
clen > so->so_snd.ssb_hiwat)
562-
gotoerr(EMSGSIZE);
563564
if (space < resid + clen && uio &&
564565
(atomic || space < so->so_snd.ssb_lowat || space < clen)) {
565566
if (flags & (MSG_FNONBLOCKING|MSG_DONTWAIT))

sys/kern/uipc_socket2.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
*
3434
* @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
3535
* $FreeBSD: src/sys/kern/uipc_socket2.c,v 1.55.2.17 2002/08/31 19:04:55 dwmalone Exp $
36-
* $DragonFly: src/sys/kern/uipc_socket2.c,v 1.30 2008/04/20 13:44:25 swildner Exp $
36+
* $DragonFly: src/sys/kern/uipc_socket2.c,v 1.31 2008/05/27 05:25:34 dillon Exp $
3737
*/
3838

3939
#include "opt_param.h"
@@ -423,7 +423,7 @@ ssb_reserve(struct signalsockbuf *ssb, u_long cc, struct socket *so,
423423
* or when called from netgraph (ie, ngd_attach)
424424
*/
425425
if (cc > sb_max_adj)
426-
return (0);
426+
cc = sb_max_adj;
427427
if (!chgsbsize(so->so_cred->cr_uidinfo, &ssb->ssb_hiwat, cc,
428428
rl ? rl->rlim_cur : RLIM_INFINITY)) {
429429
return (0);

sys/kern/uipc_usrreq.c

Lines changed: 23 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
*
3333
* From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
3434
* $FreeBSD: src/sys/kern/uipc_usrreq.c,v 1.54.2.10 2003/03/04 17:28:09 nectar Exp $
35-
* $DragonFly: src/sys/kern/uipc_usrreq.c,v 1.40 2008/05/27 01:10:39 dillon Exp $
35+
* $DragonFly: src/sys/kern/uipc_usrreq.c,v 1.41 2008/05/27 05:25:34 dillon Exp $
3636
*/
3737

3838
#include <sys/param.h>
@@ -71,7 +71,7 @@ static struct unp_head unp_shead, unp_dhead;
7171
* Unix communications domain.
7272
*
7373
* TODO:
74-
* SEQPACKET, RDM
74+
* RDM
7575
* rethink name space problems
7676
* need a proper out-of-band
7777
* lock pushdown
@@ -233,7 +233,6 @@ uipc_rcvd(struct socket *so, int flags)
233233
{
234234
struct unpcb *unp = so->so_pcb;
235235
struct socket *so2;
236-
u_long newhiwat;
237236

238237
if (unp == NULL)
239238
return EINVAL;
@@ -246,19 +245,18 @@ uipc_rcvd(struct socket *so, int flags)
246245
case SOCK_SEQPACKET:
247246
if (unp->unp_conn == NULL)
248247
break;
249-
so2 = unp->unp_conn->unp_socket;
250248
/*
251-
* Adjust backpressure on sender
252-
* and wakeup any waiting to write.
249+
* Because we are transfering mbufs directly to the
250+
* peer socket we have to use SSB_STOP on the sender
251+
* to prevent it from building up infinite mbufs.
253252
*/
254-
so2->so_snd.ssb_mbmax += unp->unp_mbcnt - so->so_rcv.ssb_mbcnt;
255-
unp->unp_mbcnt = so->so_rcv.ssb_mbcnt;
256-
newhiwat =
257-
so2->so_snd.ssb_hiwat + unp->unp_cc - so->so_rcv.ssb_cc;
258-
chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.ssb_hiwat,
259-
newhiwat, RLIM_INFINITY);
260-
unp->unp_cc = so->so_rcv.ssb_cc;
261-
sowwakeup(so2);
253+
so2 = unp->unp_conn->unp_socket;
254+
if (so->so_rcv.ssb_cc < so2->so_snd.ssb_hiwat &&
255+
so->so_rcv.ssb_mbcnt < so2->so_snd.ssb_mbmax
256+
) {
257+
so2->so_snd.ssb_flags &= ~SSB_STOP;
258+
sowwakeup(so2);
259+
}
262260
break;
263261

264262
default:
@@ -276,7 +274,6 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
276274
int error = 0;
277275
struct unpcb *unp = so->so_pcb;
278276
struct socket *so2;
279-
u_long newhiwat;
280277

281278
if (unp == NULL) {
282279
error = EINVAL;
@@ -368,14 +365,17 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
368365
sbappend(&so2->so_rcv.sb, m);
369366
m = NULL;
370367
}
371-
so->so_snd.ssb_mbmax -=
372-
so2->so_rcv.ssb_mbcnt - unp->unp_conn->unp_mbcnt;
373-
unp->unp_conn->unp_mbcnt = so2->so_rcv.ssb_mbcnt;
374-
newhiwat = so->so_snd.ssb_hiwat -
375-
(so2->so_rcv.ssb_cc - unp->unp_conn->unp_cc);
376-
chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.ssb_hiwat,
377-
newhiwat, RLIM_INFINITY);
378-
unp->unp_conn->unp_cc = so2->so_rcv.ssb_cc;
368+
369+
/*
370+
* Because we are transfering mbufs directly to the
371+
* peer socket we have to use SSB_STOP on the sender
372+
* to prevent it from building up infinite mbufs.
373+
*/
374+
if (so2->so_rcv.ssb_cc >= so->so_snd.ssb_hiwat ||
375+
so2->so_rcv.ssb_mbcnt >= so->so_snd.ssb_mbmax
376+
) {
377+
so->so_snd.ssb_flags |= SSB_STOP;
378+
}
379379
sorwakeup(so2);
380380
break;
381381

@@ -406,16 +406,10 @@ static int
406406
uipc_sense(struct socket *so, struct stat *sb)
407407
{
408408
struct unpcb *unp = so->so_pcb;
409-
struct socket *so2;
410409

411410
if (unp == NULL)
412411
return EINVAL;
413412
sb->st_blksize = so->so_snd.ssb_hiwat;
414-
if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) &&
415-
unp->unp_conn != NULL) {
416-
so2 = unp->unp_conn->unp_socket;
417-
sb->st_blksize += so2->so_rcv.ssb_cc;
418-
}
419413
sb->st_dev = NOUDEV;
420414
if (unp->unp_ino == 0) /* make up a non-zero inode number */
421415
unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;

sys/platform/vkernel/platform/init.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3232
* SUCH DAMAGE.
3333
*
34-
* $DragonFly: src/sys/platform/vkernel/platform/init.c,v 1.54 2008/05/27 01:10:45 dillon Exp $
34+
* $DragonFly: src/sys/platform/vkernel/platform/init.c,v 1.55 2008/05/27 05:25:35 dillon Exp $
3535
*/
3636

3737
#include <sys/types.h>
@@ -973,6 +973,8 @@ unix_connect(const char *path)
973973
struct sockaddr_un sunx;
974974
int len;
975975
int net_fd;
976+
int sndbuf = 262144;
977+
struct stat st;
976978

977979
snprintf(sunx.sun_path, sizeof(sunx.sun_path), "%s", path);
978980
len = offsetof(struct sockaddr_un, sun_path[strlen(sunx.sun_path)]);
@@ -987,6 +989,9 @@ unix_connect(const char *path)
987989
close(net_fd);
988990
return(-1);
989991
}
992+
setsockopt(net_fd, SOL_SOCKET, SO_SNDBUF, &sndbuf, sizeof(sndbuf));
993+
if (fstat(net_fd, &st) == 0)
994+
printf("Network socket buffer: %d bytes\n", st.st_blksize);
990995
fcntl(net_fd, F_SETFL, O_NONBLOCK);
991996
return(net_fd);
992997
}

sys/sys/socketvar.h

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
*
3333
* @(#)socketvar.h 8.3 (Berkeley) 2/19/95
3434
* $FreeBSD: src/sys/sys/socketvar.h,v 1.46.2.10 2003/08/24 08:24:39 hsu Exp $
35-
* $DragonFly: src/sys/sys/socketvar.h,v 1.30 2007/11/07 18:24:04 dillon Exp $
35+
* $DragonFly: src/sys/sys/socketvar.h,v 1.31 2008/05/27 05:25:36 dillon Exp $
3636
*/
3737

3838
#ifndef _SYS_SOCKETVAR_H_
@@ -83,6 +83,7 @@ struct signalsockbuf {
8383
#define SSB_AIO 0x80 /* AIO operations queued */
8484
#define SSB_KNOTE 0x100 /* kernel note attached */
8585
#define SSB_MEVENT 0x200 /* need message event notification */
86+
#define SSB_STOP 0x400 /* backpressure indicator */
8687

8788
/*
8889
* Per-socket kernel structure. Contains universal send and receive queues,
@@ -220,11 +221,24 @@ struct xsocket {
220221
* How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
221222
* This is problematical if the fields are unsigned, as the space might
222223
* still be negative (cc > hiwat or mbcnt > mbmax). Should detect
223-
* overflow and return 0. Should use "lmin" but it doesn't exist now.
224+
* overflow and return 0.
225+
*
226+
* SSB_STOP ignores cc/hiwat and returns 0. This is used by unix domain
227+
* stream sockets to signal backpressure.
224228
*/
225-
#define ssb_space(ssb) \
226-
((long)imin((int)((ssb)->ssb_hiwat - (ssb)->ssb_cc), \
227-
(int)((ssb)->ssb_mbmax - (ssb)->ssb_mbcnt)))
229+
static __inline
230+
long
231+
ssb_space(struct signalsockbuf *ssb)
232+
{
233+
long bleft;
234+
long mleft;
235+
236+
if (ssb->ssb_flags & SSB_STOP)
237+
return(0);
238+
bleft = ssb->ssb_hiwat - ssb->ssb_cc;
239+
mleft = ssb->ssb_mbmax - ssb->ssb_mbcnt;
240+
return((bleft < mleft) ? bleft : mleft);
241+
}
228242

229243
#define ssb_append(ssb, m) \
230244
sbappend(&(ssb)->sb, m)

sys/sys/unpcb.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
*
3333
* @(#)unpcb.h 8.1 (Berkeley) 6/2/93
3434
* $FreeBSD: src/sys/sys/unpcb.h,v 1.9.2.1 2002/03/09 05:22:23 dd Exp $
35-
* $DragonFly: src/sys/sys/unpcb.h,v 1.4 2005/04/20 19:38:22 hsu Exp $
35+
* $DragonFly: src/sys/sys/unpcb.h,v 1.5 2008/05/27 05:25:36 dillon Exp $
3636
*/
3737

3838
#ifndef _SYS_UNPCB_H_
@@ -79,8 +79,8 @@ struct unpcb {
7979
struct unp_head unp_refs; /* referencing socket linked list */
8080
LIST_ENTRY(unpcb) unp_reflink; /* link in unp_refs list */
8181
struct sockaddr_un *unp_addr; /* bound address of socket */
82-
int unp_cc; /* copy of rcv.sb_cc */
83-
int unp_mbcnt; /* copy of rcv.sb_mbcnt */
82+
int unused01;
83+
int unused02;
8484
unp_gen_t unp_gencnt; /* generation count of this instance */
8585
int unp_flags; /* flags */
8686
struct xucred unp_peercred; /* peer credentials, if applicable */

0 commit comments

Comments
 (0)