Skip to content

Commit

Permalink
Fix bad management of GSO data
Browse files Browse the repository at this point in the history
Prevents rare kernel panics and low performance during GSO
segmentation.

Progress on #267.
  • Loading branch information
ydahhrk committed Aug 7, 2018
1 parent e431a8d commit 2a7318d
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 275 deletions.
1 change: 0 additions & 1 deletion include/nat64/mod/common/rfc6145/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ typedef enum addrxlat_verdict {
struct translation_steps *ttpcomm_get_steps(struct packet *in);

void partialize_skb(struct sk_buff *skb, unsigned int csum_offset);
int copy_payload(struct xlation *state);
bool will_need_frag_hdr(const struct iphdr *hdr);
verdict ttpcomm_translate_inner_packet(struct xlation *state);

Expand Down
114 changes: 63 additions & 51 deletions mod/common/rfc6145/4to6.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,19 @@
verdict ttp46_alloc_skb(struct xlation *state)
{
struct packet *in = &state->in;
size_t l3_hdr_len;
size_t total_len;
size_t reserve = LL_MAX_HEADER;
struct sk_buff *skb;
int delta;
struct sk_buff *out;
struct iphdr *hdr4_inner;
struct frag_hdr *hdr_frag = NULL;
struct skb_shared_info *shinfo;

/*
* These are my assumptions to compute total_len:
*
* The IPv4 header will be replaced by a IPv6 header and possibly a
* fragment header.
* (we will reserve room for this fragment header just in case the
* kernel wants to do something with it later.)
* The L4 header will never change in size
* (in particular, ICMPv4 hdr len == ICMPv6 hdr len).
* The payload will not change in TCP, UDP and ICMP infos.
Expand All @@ -35,50 +37,73 @@ verdict ttp46_alloc_skb(struct xlation *state)
* The IPv4 header will be replaced by a IPv6 header and possibly a
* fragment header.
* The sub-L4 header will never change in size.
* The subpayload might get truncated to maximize delivery probability.
* The subpayload will never change in size (by us).
*/
l3_hdr_len = sizeof(struct ipv6hdr);
if (will_need_frag_hdr(pkt_ip4_hdr(in))) {
l3_hdr_len += sizeof(struct frag_hdr);
} else {
/* The kernel might want to fragment this so leave room.*/
reserve += sizeof(struct frag_hdr);
}

total_len = l3_hdr_len + pkt_l3payload_len(in);
/* Calculate the "delta" - the amount the packet might grow in size. */
delta = sizeof(struct ipv6hdr) - pkt_l3hdr_len(in)
+ sizeof(struct frag_hdr);
if (is_first_frag4(pkt_ip4_hdr(in)) && pkt_is_icmp4_error(in)) {
struct iphdr *hdr4_inner = pkt_payload(in);

total_len += sizeof(struct ipv6hdr) - (hdr4_inner->ihl << 2);
hdr4_inner = pkt_payload(in);
delta += sizeof(struct ipv6hdr) - (hdr4_inner->ihl << 2);
if (will_need_frag_hdr(hdr4_inner))
total_len += sizeof(struct frag_hdr);

/* All errors from RFC 4443 share this. */
if (total_len > IPV6_MIN_MTU)
total_len = IPV6_MIN_MTU;
delta += sizeof(struct frag_hdr);
}

skb = alloc_skb(reserve + total_len, GFP_ATOMIC);
if (!skb) {
/*
* Do not shrink under any circumstances because I'm not sure what
* happens when headroom is negative.
*/
if (delta < 0)
delta = 0;

/* Allocate the outgoing packet as a copy of @in with shared pages. */
out = __pskb_copy(in->skb, delta + skb_headroom(in->skb), GFP_ATOMIC);
if (!out) {
inc_stats(in, IPSTATS_MIB_INDISCARDS);
return VERDICT_DROP;
}

skb_reserve(skb, reserve);
skb_put(skb, total_len);
skb_reset_mac_header(skb);
skb_reset_network_header(skb);
skb_set_transport_header(skb, l3_hdr_len);
/* Remove outer l3 and l4 headers from the copy. */
skb_pull(out, pkt_hdrs_len(in));

if (will_need_frag_hdr(pkt_ip4_hdr(in)))
hdr_frag = (struct frag_hdr *)(ipv6_hdr(skb) + 1);
if (is_first_frag4(pkt_ip4_hdr(in)) && pkt_is_icmp4_error(in)) {
hdr4_inner = pkt_payload(in);

/* Remove inner l3 headers from the copy. */
skb_pull(out, hdr4_inner->ihl << 2);

/* Add inner l3 headers to the copy. */
if (will_need_frag_hdr(hdr4_inner))
skb_push(out, sizeof(struct frag_hdr));
skb_push(out, sizeof(struct ipv6hdr));
}

/* Add outer l4 headers to the copy. */
skb_push(out, pkt_l4hdr_len(in));
skb_reset_transport_header(out);

pkt_fill(&state->out, skb, L3PROTO_IPV6, pkt_l4_proto(in),
hdr_frag, skb_transport_header(skb) + pkt_l4hdr_len(in),
/* Add outer l3 headers to the copy. */
if (will_need_frag_hdr(pkt_ip4_hdr(in)))
hdr_frag = (struct frag_hdr *)skb_push(out, sizeof(struct frag_hdr));
skb_push(out, sizeof(struct ipv6hdr));
skb_reset_network_header(out);
skb_reset_mac_header(out);

/* Wrap up. */
pkt_fill(&state->out, out, L3PROTO_IPV6, pkt_l4_proto(in),
hdr_frag, skb_transport_header(out) + pkt_l4hdr_len(in),
pkt_original_pkt(in));

skb->mark = in->skb->mark;
skb->protocol = htons(ETH_P_IPV6);
memset(out->cb, 0, sizeof(out->cb));
out->mark = in->skb->mark;
out->protocol = htons(ETH_P_IPV6);

shinfo = skb_shinfo(out);
if (shinfo->gso_type & SKB_GSO_TCPV4) {
shinfo->gso_type &= ~SKB_GSO_TCPV4;
shinfo->gso_type |= SKB_GSO_TCPV6;
}

return VERDICT_CONTINUE;
}
Expand Down Expand Up @@ -635,17 +660,6 @@ static verdict validate_icmp4_csum(struct packet *in)
return VERDICT_CONTINUE;
}

static int post_icmp6info(struct xlation *state)
{
int error;

error = copy_payload(state);
if (error)
return error;

return update_icmp6_csum(state);
}

static verdict post_icmp6error(struct xlation *state)
{
verdict result;
Expand Down Expand Up @@ -689,7 +703,7 @@ verdict ttp46_icmp(struct xlation *state)
? cpu_to_be16(state->out.tuple.icmp6_id)
: icmpv4_hdr->un.echo.id;
icmpv6_hdr->icmp6_sequence = icmpv4_hdr->un.echo.sequence;
error = post_icmp6info(state);
error = update_icmp6_csum(state);
break;

case ICMP_ECHOREPLY:
Expand All @@ -699,7 +713,7 @@ verdict ttp46_icmp(struct xlation *state)
? cpu_to_be16(state->out.tuple.icmp6_id)
: icmpv4_hdr->un.echo.id;
icmpv6_hdr->icmp6_sequence = icmpv4_hdr->un.echo.sequence;
error = post_icmp6info(state);
error = update_icmp6_csum(state);
break;

case ICMP_DEST_UNREACH:
Expand Down Expand Up @@ -888,8 +902,7 @@ verdict ttp46_tcp(struct xlation *state)
partialize_skb(out->skb, offsetof(struct tcphdr, check));
}

/* Payload */
return copy_payload(state) ? VERDICT_DROP : VERDICT_CONTINUE;
return VERDICT_CONTINUE;
}

verdict ttp46_udp(struct xlation *state)
Expand Down Expand Up @@ -932,6 +945,5 @@ verdict ttp46_udp(struct xlation *state)
return VERDICT_DROP;
}

/* Payload */
return copy_payload(state) ? VERDICT_DROP : VERDICT_CONTINUE;
return VERDICT_CONTINUE;
}
111 changes: 59 additions & 52 deletions mod/common/rfc6145/6to4.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,58 +17,78 @@
verdict ttp64_alloc_skb(struct xlation *state)
{
struct packet *in = &state->in;
size_t total_len;
struct sk_buff *skb;
struct sk_buff *out;
struct skb_shared_info *shinfo;

/*
* These are my assumptions to compute total_len:
* Note: For the purposes of this comment, remember that the reserved
* area of a packet (bytes between head and data) is called "headroom"
* (example: skb_headroom()), while the non-paged active area (bytes
* between data and tail) is called "head" (eg: skb_headlen()). This is
* a kernel quirk; don't blame me for it.
*
* Any L3 headers will be replaced by an IPv4 header.
* The L4 header will never change in size
* (in particular, ICMPv4 hdr len == ICMPv6 hdr len).
* The payload will not change in TCP, UDP and ICMP infos.
* I'm going to use __pskb_copy() (via pskb_copy()) because I need the
* incoming and outgoing packets to share the same paged data. This is
* not only for the sake of performance (prevents lots of data copying
* and large contiguous skbs in memory) but also because the pages need
* to survive the translation for GSO to work.
*
* As for ICMP errors:
* Any sub-L3 headers will be replaced by an IPv4 header.
* The sub-L4 header will never change in size.
* The subpayload might get truncated to maximize delivery probability.
* Since the IPv4 version of the packet is going to be invariably
* smaller than its IPv6 counterpart, you'd think we should reserve less
* memory for it. But there's a problem: __pskb_copy() only allows us to
* shrink the headroom; not the head. If we try to shrink the head
* through the headroom and the v6 packet happens to have one too many
* extension headers, the `headroom` we'll send to __pskb_copy() will be
* negative, and then skb_copy_from_linear_data() will write onto the
* tail area without knowing it. (I'm reading the Linux 4.4 code.)
*
* We will therefore *not* attempt to allocate less.
*/
total_len = sizeof(struct iphdr) + pkt_l3payload_len(in);

out = pskb_copy(in->skb, GFP_ATOMIC);
if (!out) {
inc_stats(in, IPSTATS_MIB_INDISCARDS);
return VERDICT_DROP;
}

/* Remove outer l3 and l4 headers from the copy. */
skb_pull(out, pkt_hdrs_len(in));

if (is_first_frag6(pkt_frag_hdr(in)) && pkt_is_icmp6_error(in)) {
struct ipv6hdr *hdr = pkt_payload(in);
struct hdr_iterator iterator = HDR_ITERATOR_INIT(hdr);
hdr_iterator_last(&iterator);

/* Add the IPv4 subheader, remove the IPv6 subheaders. */
total_len += sizeof(struct iphdr) - (iterator.data
- pkt_payload(in));
/* Remove inner l3 headers from the copy. */
skb_pull(out, iterator.data - (void *)hdr);

/*
* RFC1812 section 4.3.2.3.
* I'm using a literal because the RFC does.
*/
if (total_len > 576)
total_len = 576;
/* Add inner l3 headers to the copy. */
skb_push(out, sizeof(struct iphdr));
}

skb = alloc_skb(LL_MAX_HEADER + total_len, GFP_ATOMIC);
if (!skb) {
inc_stats(in, IPSTATS_MIB_INDISCARDS);
return VERDICT_DROP;
}
/* Add outer l4 headers to the copy. */
skb_push(out, pkt_l4hdr_len(in));
skb_reset_transport_header(out);

skb_reserve(skb, LL_MAX_HEADER);
skb_put(skb, total_len);
skb_reset_mac_header(skb);
skb_reset_network_header(skb);
skb_set_transport_header(skb, sizeof(struct iphdr));
/* Add outer l3 headers to the copy. */
skb_push(out, sizeof(struct iphdr));
skb_reset_network_header(out);
skb_reset_mac_header(out);

pkt_fill(&state->out, skb, L3PROTO_IPV4, pkt_l4_proto(in),
NULL, skb_transport_header(skb) + pkt_l4hdr_len(in),
/* Wrap up. */
pkt_fill(&state->out, out, L3PROTO_IPV4, pkt_l4_proto(in),
NULL, skb_transport_header(out) + pkt_l4hdr_len(in),
pkt_original_pkt(in));

skb->mark = in->skb->mark;
skb->protocol = htons(ETH_P_IP);
memset(out->cb, 0, sizeof(out->cb));
out->mark = in->skb->mark;
out->protocol = htons(ETH_P_IP);

shinfo = skb_shinfo(out);
if (shinfo->gso_type & SKB_GSO_TCPV6) {
shinfo->gso_type &= ~SKB_GSO_TCPV6;
shinfo->gso_type |= SKB_GSO_TCPV4;
}

return VERDICT_CONTINUE;
}
Expand Down Expand Up @@ -630,17 +650,6 @@ static verdict validate_icmp6_csum(struct packet *in)
return VERDICT_CONTINUE;
}

static int post_icmp4info(struct xlation *state)
{
int error;

error = copy_payload(state);
if (error)
return error;

return update_icmp4_csum(state);
}

static verdict post_icmp4error(struct xlation *state)
{
verdict result;
Expand Down Expand Up @@ -679,7 +688,7 @@ verdict ttp64_icmp(struct xlation *state)
? cpu_to_be16(state->out.tuple.icmp4_id)
: icmpv6_hdr->icmp6_identifier;
icmpv4_hdr->un.echo.sequence = icmpv6_hdr->icmp6_sequence;
error = post_icmp4info(state);
error = update_icmp4_csum(state);
break;

case ICMPV6_ECHO_REPLY:
Expand All @@ -689,7 +698,7 @@ verdict ttp64_icmp(struct xlation *state)
? cpu_to_be16(state->out.tuple.icmp4_id)
: icmpv6_hdr->icmp6_identifier;
icmpv4_hdr->un.echo.sequence = icmpv6_hdr->icmp6_sequence;
error = post_icmp4info(state);
error = update_icmp4_csum(state);
break;

case ICMPV6_DEST_UNREACH:
Expand Down Expand Up @@ -821,8 +830,7 @@ verdict ttp64_tcp(struct xlation *state)
partialize_skb(out->skb, offsetof(struct tcphdr, check));
}

/* Payload */
return copy_payload(state) ? VERDICT_DROP : VERDICT_CONTINUE;
return VERDICT_CONTINUE;
}

verdict ttp64_udp(struct xlation *state)
Expand Down Expand Up @@ -858,6 +866,5 @@ verdict ttp64_udp(struct xlation *state)
partialize_skb(out->skb, offsetof(struct udphdr, check));
}

/* Payload */
return copy_payload(state) ? VERDICT_DROP : VERDICT_CONTINUE;
return VERDICT_CONTINUE;
}
21 changes: 1 addition & 20 deletions mod/common/rfc6145/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ struct backup_skb {

static verdict handle_unknown_l4(struct xlation *state)
{
return copy_payload(state) ? VERDICT_DROP : VERDICT_CONTINUE;
return VERDICT_CONTINUE;
}

static struct translation_steps steps[][L4_PROTO_COUNT] = {
Expand Down Expand Up @@ -71,25 +71,6 @@ static struct translation_steps steps[][L4_PROTO_COUNT] = {
}
};

int copy_payload(struct xlation *state)
{
int error;

error = skb_copy_bits(state->in.skb, pkt_payload_offset(&state->in),
pkt_payload(&state->out),
/*
* Note: There's an important reason why the payload
* length must be extracted from the outgoing packet:
* the outgoing packet might be truncated. See
* ttp46_create_skb() and ttp64_create_skb().
*/
pkt_payload_len_frag(&state->out));
if (error)
log_debug("The payload copy threw errcode %d.", error);

return error;
}

bool will_need_frag_hdr(const struct iphdr *hdr)
{
return is_mf_set_ipv4(hdr) || get_fragment_offset_ipv4(hdr);
Expand Down
Loading

0 comments on commit 2a7318d

Please sign in to comment.