diff --git a/ovn/controller/pinctrl.c b/ovn/controller/pinctrl.c index a20f6b6efb8..e3ee516e49b 100644 --- a/ovn/controller/pinctrl.c +++ b/ovn/controller/pinctrl.c @@ -61,6 +61,9 @@ static struct rconn *swconn; * rconn_get_connection_seqno(rconn), 'swconn' has reconnected. */ static unsigned int conn_seq_no; +static void init_buffered_packets_map(void); +static void destroy_buffered_packets_map(void); + static void pinctrl_handle_put_mac_binding(const struct flow *md, const struct flow *headers, bool is_arp); @@ -96,6 +99,7 @@ static void pinctrl_handle_put_nd_ra_opts( struct ofputil_packet_in *pin, struct ofpbuf *userdata, struct ofpbuf *continuation); static void pinctrl_handle_nd_ns(const struct flow *ip_flow, + struct dp_packet *pkt_in, const struct match *md, struct ofpbuf *userdata); static void init_ipv6_ras(void); @@ -108,6 +112,7 @@ static void send_ipv6_ras( ; COVERAGE_DEFINE(pinctrl_drop_put_mac_binding); +COVERAGE_DEFINE(pinctrl_drop_buffered_packets_map); void pinctrl_init(void) @@ -117,6 +122,7 @@ pinctrl_init(void) init_put_mac_bindings(); init_send_garps(); init_ipv6_ras(); + init_buffered_packets_map(); } static ovs_be32 @@ -190,9 +196,180 @@ set_actions_and_enqueue_msg(const struct dp_packet *packet, ofpbuf_uninit(&ofpacts); } +struct buffer_info { + struct ofpbuf ofpacts; + struct dp_packet *p; +}; + +#define BUFFER_QUEUE_DEPTH 4 +struct buffered_packets { + struct hmap_node hmap_node; + + /* key */ + struct in6_addr ip; + + long long int timestamp; + + struct buffer_info data[BUFFER_QUEUE_DEPTH]; + uint32_t head, tail; +}; + +static struct hmap buffered_packets_map; + +static void +init_buffered_packets_map(void) +{ + hmap_init(&buffered_packets_map); +} + +static void +destroy_buffered_packets(struct buffered_packets *bp) +{ + struct buffer_info *bi; + + while (bp->head != bp->tail) { + bi = &bp->data[bp->head]; + dp_packet_uninit(bi->p); + ofpbuf_uninit(&bi->ofpacts); + + bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH; + } + hmap_remove(&buffered_packets_map, &bp->hmap_node); + free(bp); +} + +static void +destroy_buffered_packets_map(void) +{ + struct buffered_packets *bp; + HMAP_FOR_EACH_POP (bp, hmap_node, &buffered_packets_map) { + destroy_buffered_packets(bp); + } + hmap_destroy(&buffered_packets_map); +} + +static void +buffered_push_packet(struct buffered_packets *bp, + struct dp_packet *packet, + const struct match *md) +{ + uint32_t next = (bp->tail + 1) % BUFFER_QUEUE_DEPTH; + struct buffer_info *bi = &bp->data[bp->tail]; + + ofpbuf_init(&bi->ofpacts, 4096); + + reload_metadata(&bi->ofpacts, md); + struct ofpact_resubmit *resubmit = ofpact_put_RESUBMIT(&bi->ofpacts); + resubmit->in_port = OFPP_CONTROLLER; + resubmit->table_id = OFTABLE_REMOTE_OUTPUT; + + bi->p = packet; + + if (next == bp->head) { + bi = &bp->data[bp->head]; + dp_packet_uninit(bi->p); + ofpbuf_uninit(&bi->ofpacts); + bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH; + } + bp->tail = next; +} + +static void +buffered_send_packets(struct buffered_packets *bp, struct eth_addr *addr) +{ + enum ofp_version version = rconn_get_version(swconn); + enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version); + + while (bp->head != bp->tail) { + struct buffer_info *bi = &bp->data[bp->head]; + struct eth_header *eth = dp_packet_data(bi->p); + + eth->eth_dst = *addr; + struct ofputil_packet_out po = { + .packet = dp_packet_data(bi->p), + .packet_len = dp_packet_size(bi->p), + .buffer_id = UINT32_MAX, + .ofpacts = bi->ofpacts.data, + .ofpacts_len = bi->ofpacts.size, + }; + match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER); + queue_msg(ofputil_encode_packet_out(&po, proto)); + + ofpbuf_uninit(&bi->ofpacts); + dp_packet_uninit(bi->p); + + bp->head = (bp->head + 1) % BUFFER_QUEUE_DEPTH; + } +} + +#define BUFFER_MAP_TIMEOUT 10000 static void -pinctrl_handle_arp(const struct flow *ip_flow, const struct match *md, - struct ofpbuf *userdata) +buffered_packets_map_gc(void) +{ + struct buffered_packets *cur_qp, *next_qp; + long long int now = time_msec(); + + HMAP_FOR_EACH_SAFE (cur_qp, next_qp, hmap_node, &buffered_packets_map) { + if (now > cur_qp->timestamp + BUFFER_MAP_TIMEOUT) { + destroy_buffered_packets(cur_qp); + } + } +} + +static struct buffered_packets * +pinctrl_find_buffered_packets(const struct in6_addr *ip, uint32_t hash) +{ + struct buffered_packets *qp; + + HMAP_FOR_EACH_WITH_HASH (qp, hmap_node, hash, + &buffered_packets_map) { + if (IN6_ARE_ADDR_EQUAL(&qp->ip, ip)) { + return qp; + } + } + return NULL; +} + +static int +pinctrl_handle_buffered_packets(const struct flow *ip_flow, + struct dp_packet *pkt_in, + const struct match *md, bool is_arp) +{ + struct buffered_packets *bp; + struct dp_packet *clone; + struct in6_addr addr; + + if (is_arp) { + addr = in6_addr_mapped_ipv4(ip_flow->nw_dst); + } else { + addr = ip_flow->ipv6_dst; + } + + uint32_t hash = hash_bytes(&addr, sizeof addr, 0); + bp = pinctrl_find_buffered_packets(&addr, hash); + if (!bp) { + if (hmap_count(&buffered_packets_map) >= 1000) { + COVERAGE_INC(pinctrl_drop_buffered_packets_map); + return -ENOMEM; + } + + bp = xmalloc(sizeof *bp); + hmap_insert(&buffered_packets_map, &bp->hmap_node, hash); + bp->head = bp->tail = 0; + bp->ip = addr; + } + bp->timestamp = time_msec(); + /* clone the packet to send it later with correct L2 address */ + clone = dp_packet_clone_data(dp_packet_data(pkt_in), + dp_packet_size(pkt_in)); + buffered_push_packet(bp, clone, md); + + return 0; +} + +static void +pinctrl_handle_arp(const struct flow *ip_flow, struct dp_packet *pkt_in, + const struct match *md, struct ofpbuf *userdata) { /* This action only works for IP packets, and the switch should only send * us IP packets this way, but check here just to be sure. */ @@ -203,6 +380,8 @@ pinctrl_handle_arp(const struct flow *ip_flow, const struct match *md, return; } + pinctrl_handle_buffered_packets(ip_flow, pkt_in, md, true); + /* Compose an ARP packet. */ uint64_t packet_stub[128 / 8]; struct dp_packet packet; @@ -1162,7 +1341,7 @@ process_packet_in(const struct ofp_header *msg, switch (ntohl(ah->opcode)) { case ACTION_OPCODE_ARP: - pinctrl_handle_arp(&headers, &pin.flow_metadata, &userdata); + pinctrl_handle_arp(&headers, &packet, &pin.flow_metadata, &userdata); break; case ACTION_OPCODE_PUT_ARP: @@ -1207,7 +1386,8 @@ process_packet_in(const struct ofp_header *msg, break; case ACTION_OPCODE_ND_NS: - pinctrl_handle_nd_ns(&headers, &pin.flow_metadata, &userdata); + pinctrl_handle_nd_ns(&headers, &packet, &pin.flow_metadata, + &userdata); break; case ACTION_OPCODE_ICMP: @@ -1310,6 +1490,7 @@ pinctrl_run(struct ovsdb_idl_txn *ovnsb_idl_txn, local_datapaths, active_tunnels); send_ipv6_ras(sbrec_port_binding_by_datapath, sbrec_port_binding_by_name, local_datapaths); + buffered_packets_map_gc(); } /* Table of ipv6_ra_state structures, keyed on logical port name */ @@ -1620,6 +1801,7 @@ pinctrl_destroy(void) destroy_put_mac_bindings(); destroy_send_garps(); destroy_ipv6_ras(); + destroy_buffered_packets_map(); } /* Implementation of the "put_arp" and "put_nd" OVN actions. These @@ -1642,7 +1824,7 @@ struct put_mac_binding { /* Key. */ uint32_t dp_key; uint32_t port_key; - char ip_s[INET6_ADDRSTRLEN + 1]; + struct in6_addr ip_key; /* Value. */ struct eth_addr mac; @@ -1666,13 +1848,13 @@ destroy_put_mac_bindings(void) static struct put_mac_binding * pinctrl_find_put_mac_binding(uint32_t dp_key, uint32_t port_key, - const char *ip_s, uint32_t hash) + const struct in6_addr *ip_key, uint32_t hash) { struct put_mac_binding *pa; HMAP_FOR_EACH_WITH_HASH (pa, hmap_node, hash, &put_mac_bindings) { if (pa->dp_key == dp_key && pa->port_key == port_key - && !strcmp(pa->ip_s, ip_s)) { + && IN6_ARE_ADDR_EQUAL(&pa->ip_key, ip_key)) { return pa; } } @@ -1685,18 +1867,19 @@ pinctrl_handle_put_mac_binding(const struct flow *md, { uint32_t dp_key = ntohll(md->metadata); uint32_t port_key = md->regs[MFF_LOG_INPORT - MFF_REG0]; - char ip_s[INET6_ADDRSTRLEN]; + struct buffered_packets *bp; + struct in6_addr ip_key; if (is_arp) { - ovs_be32 ip = htonl(md->regs[0]); - inet_ntop(AF_INET, &ip, ip_s, sizeof(ip_s)); + ip_key = in6_addr_mapped_ipv4(htonl(md->regs[0])); } else { ovs_be128 ip6 = hton128(flow_get_xxreg(md, 0)); - inet_ntop(AF_INET6, &ip6, ip_s, sizeof(ip_s)); + memcpy(&ip_key, &ip6, sizeof ip_key); } - uint32_t hash = hash_string(ip_s, hash_2words(dp_key, port_key)); + uint32_t hash = hash_bytes(&ip_key, sizeof ip_key, + hash_2words(dp_key, port_key)); struct put_mac_binding *pmb - = pinctrl_find_put_mac_binding(dp_key, port_key, ip_s, hash); + = pinctrl_find_put_mac_binding(dp_key, port_key, &ip_key, hash); if (!pmb) { if (hmap_count(&put_mac_bindings) >= 1000) { COVERAGE_INC(pinctrl_drop_put_mac_binding); @@ -1707,10 +1890,17 @@ pinctrl_handle_put_mac_binding(const struct flow *md, hmap_insert(&put_mac_bindings, &pmb->hmap_node, hash); pmb->dp_key = dp_key; pmb->port_key = port_key; - ovs_strlcpy_arrays(pmb->ip_s, ip_s); + pmb->ip_key = ip_key; } pmb->timestamp = time_msec(); pmb->mac = headers->dl_src; + + /* send queued pkts */ + uint32_t bhash = hash_bytes(&ip_key, sizeof ip_key, 0); + bp = pinctrl_find_buffered_packets(&ip_key, bhash); + if (bp) { + buffered_send_packets(bp, &pmb->mac); + } } static const struct sbrec_mac_binding * @@ -1760,25 +1950,23 @@ run_put_mac_binding(struct ovsdb_idl_txn *ovnsb_idl_txn, snprintf(mac_string, sizeof mac_string, ETH_ADDR_FMT, ETH_ADDR_ARGS(pmb->mac)); - /* Check for and update an existing IP-MAC binding for this logical - * port. - */ + struct ds ip_s = DS_EMPTY_INITIALIZER; + ipv6_format_mapped(&pmb->ip_key, &ip_s); + + /* Update or add an IP-MAC binding for this logical port. */ const struct sbrec_mac_binding *b = mac_binding_lookup(sbrec_mac_binding_by_lport_ip, pb->logical_port, - pmb->ip_s); - if (b) { - if (strcmp(b->mac, mac_string)) { - sbrec_mac_binding_set_mac(b, mac_string); - } - return; - } - - /* Add new IP-MAC binding for this logical port. */ - b = sbrec_mac_binding_insert(ovnsb_idl_txn); - sbrec_mac_binding_set_logical_port(b, pb->logical_port); - sbrec_mac_binding_set_ip(b, pmb->ip_s); - sbrec_mac_binding_set_mac(b, mac_string); - sbrec_mac_binding_set_datapath(b, pb->datapath); + ds_cstr(&ip_s)); + if (!b) { + b = sbrec_mac_binding_insert(ovnsb_idl_txn); + sbrec_mac_binding_set_logical_port(b, pb->logical_port); + sbrec_mac_binding_set_ip(b, ds_cstr(&ip_s)); + sbrec_mac_binding_set_mac(b, mac_string); + sbrec_mac_binding_set_datapath(b, pb->datapath); + } else if (strcmp(b->mac, mac_string)) { + sbrec_mac_binding_set_mac(b, mac_string); + } + ds_destroy(&ip_s); } static void @@ -2409,8 +2597,8 @@ pinctrl_handle_nd_na(const struct flow *ip_flow, const struct match *md, } static void -pinctrl_handle_nd_ns(const struct flow *ip_flow, const struct match *md, - struct ofpbuf *userdata) +pinctrl_handle_nd_ns(const struct flow *ip_flow, struct dp_packet *pkt_in, + const struct match *md, struct ofpbuf *userdata) { /* This action only works for IPv6 packets. */ if (get_dl_type(ip_flow) != htons(ETH_TYPE_IPV6)) { @@ -2419,6 +2607,8 @@ pinctrl_handle_nd_ns(const struct flow *ip_flow, const struct match *md, return; } + pinctrl_handle_buffered_packets(ip_flow, pkt_in, md, false); + uint64_t packet_stub[128 / 8]; struct dp_packet packet; dp_packet_use_stub(&packet, packet_stub, sizeof packet_stub); diff --git a/tests/ovn.at b/tests/ovn.at index 86d339f926a..002ef0dfb29 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -2623,6 +2623,11 @@ for is in 1 2 3; do echo $arp >> $id$jd2$kd.expected done done + if test $(vif_to_hv ${is}${js}${ks}) = $(vif_to_hv ${id}${jd}1); then + hmac=8000000000$o4 + rmac=00000000ff$id$jd + echo ${hmac}${rmac}08004500001c00000000"3f1101"00${sip}${dip}0035111100080000 >> ${id}11.expected + fi done done done @@ -2722,10 +2727,6 @@ for i in 1 2 3; do done done -# Allow some time for packet forwarding. -# XXX This can be improved. -sleep 1 - # 8. Generate an ARP reply for each of the IP addresses ARPed for # earlier as #3. # @@ -8912,8 +8913,6 @@ packet=${dst_mac}${src_mac}08004500001c0000000040110000${src_ip}${dst_ip}0035111 as hv1 ovs-appctl netdev-dummy/receive hv1-vif1 $packet OVS_WAIT_UNTIL([test `ovn-sbctl find mac_binding ip="10.0.0.2" | wc -l` -gt 0]) ovn-nbctl --wait=hv sync -# Send the second packet to reach the destination. -as hv1 ovs-appctl netdev-dummy/receive hv1-vif1 $packet # Packet to Expect at 'alice1' src_mac="000000010204" @@ -11322,3 +11321,149 @@ AT_CHECK([ovn-nbctl lsp-set-addresses sw2-p1 "00:00:00:00:00:04 192.168.0.3"]) AT_CHECK([ovn-nbctl lsp-set-addresses sw2-p1 "00:00:00:00:00:04 aef0::1"]) AT_CLEANUP + +AT_SETUP([ovn -- IP packet buffering]) +AT_KEYWORDS([ip-buffering]) +AT_SKIP_IF([test $HAVE_PYTHON = no]) +ovn_start + +# Logical network: +# One LR lr0 that has switches sw0 (192.168.1.0/24) and +# sw1 (172.16.1.0/24) connected to it. +# +# Physical network: +# Tw0 hypervisors hv[12]. +# hv1 hosts vif sw0-p0. +# hv1 hosts vif sw1-p0. + +send_icmp_packet() { + local inport=$1 hv=$2 eth_src=$3 eth_dst=$4 ipv4_src=$5 ipv4_dst=$6 ip_chksum=$7 data=$8 + shift 8 + + local ip_ttl=ff + local ip_len=001c + local packet=${eth_dst}${eth_src}08004500${ip_len}00004000${ip_ttl}01${ip_chksum}${ipv4_src}${ipv4_dst}${data} + as hv$hv ovs-appctl netdev-dummy/receive hv$hv-vif$inport $packet +} + +send_icmp6_packet() { + local inport=$1 hv=$2 eth_src=$3 eth_dst=$4 ipv6_src=$5 ipv6_dst=$6 ipv6_router=$7 exp_icmp_chksum=$8 + shift 8 + + local ip6_hdr=6000000000083aff${ipv6_src}${ipv6_dst} + local packet=${eth_dst}${eth_src}86dd${ip6_hdr}8000dcb662f00001 + + as hv$hv ovs-appctl netdev-dummy/receive hv$hv-vif$inport $packet +} + +get_arp_req() { + local eth_src=$1 spa=$2 tpa=$3 + local request=ffffffffffff${eth_src}08060001080006040001${eth_src}${spa}000000000000${tpa} + echo $request +} + +send_arp_reply() { + local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 spa=$5 tpa=$6 + local request=${eth_dst}${eth_src}08060001080006040002${eth_src}${spa}${eth_dst}${tpa} + as hv$hv ovs-appctl netdev-dummy/receive hv${hv}-vif$inport $request +} + +send_na() { + local hv=$1 inport=$2 eth_src=$3 eth_dst=$4 src_ip=$5 dst_ip=$6 + local ip6_hdr=6000000000203aff${src_ip}${dst_ip} + local request=${eth_dst}${eth_src}86dd${ip6_hdr}8800d78440000000${src_ip}0201${eth_src} + + as hv$hv ovs-appctl netdev-dummy/receive hv${hv}-vif$inport $request +} + +get_nd() { + local eth_src=$1 src_ip=$2 dst_ip=$3 ta=$4 + local ip6_hdr=6000000000203aff${src_ip}${dst_ip} + request=3333ff000010${eth_src}86dd${ip6_hdr}8700357600000000${ta}0101${eth_src} + + echo $request +} + +net_add n1 + +sim_add hv1 +as hv1 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.1 +ovs-vsctl -- add-port br-int hv1-vif1 -- \ + set interface hv1-vif1 external-ids:iface-id=sw0-p0 \ + options:tx_pcap=hv1/vif1-tx.pcap \ + options:rxq_pcap=hv1/vif1-rx.pcap \ + ofport-request=1 + +sim_add hv2 +as hv2 +ovs-vsctl add-br br-phys +ovn_attach n1 br-phys 192.168.0.2 +ovs-vsctl -- add-port br-int hv2-vif1 -- \ + set interface hv2-vif1 external-ids:iface-id=sw1-p0 \ + options:tx_pcap=hv2/vif1-tx.pcap \ + options:rxq_pcap=hv2/vif1-rx.pcap \ + ofport-request=1 + +ovn-nbctl create Logical_Router name=lr0 options:chassis=hv1 +ovn-nbctl ls-add sw0 +ovn-nbctl ls-add sw1 + +ovn-nbctl lrp-add lr0 sw0 00:00:01:01:02:03 192.168.1.1/24 2001::1/64 +ovn-nbctl lsp-add sw0 rp-sw0 -- set Logical_Switch_Port rp-sw0 \ + type=router options:router-port=sw0 \ + -- lsp-set-addresses rp-sw0 router + +ovn-nbctl lrp-add lr0 sw1 00:00:02:01:02:03 172.16.1.1/24 2002::1/64 +ovn-nbctl lsp-add sw1 rp-sw1 -- set Logical_Switch_Port rp-sw1 \ + type=router options:router-port=sw1 \ + -- lsp-set-addresses rp-sw1 router + +ovn-nbctl lsp-add sw0 sw0-p0 \ + -- lsp-set-addresses sw0-p0 "f0:00:00:01:02:03 192.168.1.2 2001::2" + +ovn-nbctl lsp-add sw1 sw1-p0 \ + -- lsp-set-addresses sw1-p0 unknown + +OVN_POPULATE_ARP +ovn-nbctl --wait=hv sync + +ip_to_hex() { + printf "%02x%02x%02x%02x" "$@" +} + +src_mac=f00000010203 +src_ip=$(ip_to_hex 192 168 1 2) +src_ip6=20010000000000000000000000000002 + +router_mac0=000001010203 +router_mac1=000002010203 +router_ip=$(ip_to_hex 172 16 1 1) +router_ip6=20020000000000000000000000000001 + +dst_mac=001122334455 +dst_ip=$(ip_to_hex 172 16 1 10) +dst_ip6=20020000000000000000000000000010 + +data=0800bee4391a0001 + +send_icmp_packet 1 1 $src_mac $router_mac0 $src_ip $dst_ip 0000 $data +send_arp_reply 2 1 $dst_mac $router_mac1 $dst_ip $router_ip +echo $(get_arp_req $router_mac1 $router_ip $dst_ip) > expected +echo "${dst_mac}${router_mac1}08004500001c00004000fe010100${src_ip}${dst_ip}${data}" >> expected + +OVN_CHECK_PACKETS([hv2/vif1-tx.pcap], [expected]) + +nd_ip=ff0200000000000000000001ff000010 +ip6_hdr=6000000000083afe${src_ip6}${dst_ip6} + +send_icmp6_packet 1 1 $src_mac $router_mac0 $src_ip6 $dst_ip6 +echo $(get_nd $router_mac1 $src_ip6 $nd_ip $dst_ip6) >> expected +echo "${dst_mac}${router_mac1}86dd${ip6_hdr}8000dcb662f00001" >> expected +send_na 2 1 $dst_mac $router_mac1 $dst_ip6 $router_ip6 + +OVN_CHECK_PACKETS([hv2/vif1-tx.pcap], [expected]) + +OVN_CLEANUP([hv1],[hv2]) +AT_CLEANUP