@@ -1,11 +1,14 @@
/* IP tables module for matching the value of the IPv4/IPv6 DSCP field
/* x_tables module for setting the IPv4/IPv6 DSCP field, Version 1.8
*
* (C) 2002 by Harald Welte <laforge@netfilter.org>
* based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
*
* See RFC2474 for a description of the DSCP field within the IP Header.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/skbuff.h>
@@ -14,102 +17,148 @@
#include <net/dsfield.h>

#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_dscp.h>
#include <linux/netfilter/xt_DSCP.h>

MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_DESCRIPTION("Xtables: DSCP/TOS field match");
MODULE_DESCRIPTION("Xtables: DSCP/TOS field modification");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ipt_dscp");
MODULE_ALIAS("ip6t_dscp");
MODULE_ALIAS("ipt_tos");
MODULE_ALIAS("ip6t_tos");
MODULE_ALIAS("ipt_DSCP");
MODULE_ALIAS("ip6t_DSCP");
MODULE_ALIAS("ipt_TOS");
MODULE_ALIAS("ip6t_TOS");

static bool
dscp_mt(const struct sk_buff *skb, struct xt_action_param *par)
static unsigned int
dscp_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_dscp_info *info = par->matchinfo;
const struct xt_DSCP_info *dinfo = par->targinfo;
u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;

return (dscp == info->dscp) ^ !!info->invert;
if (dscp != dinfo->dscp) {
if (!skb_make_writable(skb, sizeof(struct iphdr)))
return NF_DROP;

ipv4_change_dsfield(ip_hdr(skb), (__u8)(~XT_DSCP_MASK),
dinfo->dscp << XT_DSCP_SHIFT);

}
return XT_CONTINUE;
}

static bool
dscp_mt6(const struct sk_buff *skb, struct xt_action_param *par)
static unsigned int
dscp_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_dscp_info *info = par->matchinfo;
const struct xt_DSCP_info *dinfo = par->targinfo;
u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;

return (dscp == info->dscp) ^ !!info->invert;
if (dscp != dinfo->dscp) {
if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
return NF_DROP;

ipv6_change_dsfield(ipv6_hdr(skb), (__u8)(~XT_DSCP_MASK),
dinfo->dscp << XT_DSCP_SHIFT);
}
return XT_CONTINUE;
}

static int dscp_mt_check(const struct xt_mtchk_param *par)
static int dscp_tg_check(const struct xt_tgchk_param *par)
{
const struct xt_dscp_info *info = par->matchinfo;
const struct xt_DSCP_info *info = par->targinfo;

if (info->dscp > XT_DSCP_MAX) {
pr_info("dscp %x out of range\n", info->dscp);
return -EDOM;
}

return 0;
}

static bool tos_mt(const struct sk_buff *skb, struct xt_action_param *par)
static unsigned int
tos_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tos_target_info *info = par->targinfo;
struct iphdr *iph = ip_hdr(skb);
u_int8_t orig, nv;

orig = ipv4_get_dsfield(iph);
nv = (orig & ~info->tos_mask) ^ info->tos_value;

if (orig != nv) {
if (!skb_make_writable(skb, sizeof(struct iphdr)))
return NF_DROP;
iph = ip_hdr(skb);
ipv4_change_dsfield(iph, 0, nv);
}

return XT_CONTINUE;
}

static unsigned int
tos_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tos_match_info *info = par->matchinfo;

if (par->family == NFPROTO_IPV4)
return ((ip_hdr(skb)->tos & info->tos_mask) ==
info->tos_value) ^ !!info->invert;
else
return ((ipv6_get_dsfield(ipv6_hdr(skb)) & info->tos_mask) ==
info->tos_value) ^ !!info->invert;
const struct xt_tos_target_info *info = par->targinfo;
struct ipv6hdr *iph = ipv6_hdr(skb);
u_int8_t orig, nv;

orig = ipv6_get_dsfield(iph);
nv = (orig & ~info->tos_mask) ^ info->tos_value;

if (orig != nv) {
if (!skb_make_writable(skb, sizeof(struct iphdr)))
return NF_DROP;
iph = ipv6_hdr(skb);
ipv6_change_dsfield(iph, 0, nv);
}

return XT_CONTINUE;
}

static struct xt_match dscp_mt_reg[] __read_mostly = {
static struct xt_target dscp_tg_reg[] __read_mostly = {
{
.name = "dscp",
.name = "DSCP",
.family = NFPROTO_IPV4,
.checkentry = dscp_mt_check,
.match = dscp_mt,
.matchsize = sizeof(struct xt_dscp_info),
.checkentry = dscp_tg_check,
.target = dscp_tg,
.targetsize = sizeof(struct xt_DSCP_info),
.table = "mangle",
.me = THIS_MODULE,
},
{
.name = "dscp",
.name = "DSCP",
.family = NFPROTO_IPV6,
.checkentry = dscp_mt_check,
.match = dscp_mt6,
.matchsize = sizeof(struct xt_dscp_info),
.checkentry = dscp_tg_check,
.target = dscp_tg6,
.targetsize = sizeof(struct xt_DSCP_info),
.table = "mangle",
.me = THIS_MODULE,
},
{
.name = "tos",
.name = "TOS",
.revision = 1,
.family = NFPROTO_IPV4,
.match = tos_mt,
.matchsize = sizeof(struct xt_tos_match_info),
.table = "mangle",
.target = tos_tg,
.targetsize = sizeof(struct xt_tos_target_info),
.me = THIS_MODULE,
},
{
.name = "tos",
.name = "TOS",
.revision = 1,
.family = NFPROTO_IPV6,
.match = tos_mt,
.matchsize = sizeof(struct xt_tos_match_info),
.table = "mangle",
.target = tos_tg6,
.targetsize = sizeof(struct xt_tos_target_info),
.me = THIS_MODULE,
},
};

static int __init dscp_mt_init(void)
static int __init dscp_tg_init(void)
{
return xt_register_matches(dscp_mt_reg, ARRAY_SIZE(dscp_mt_reg));
return xt_register_targets(dscp_tg_reg, ARRAY_SIZE(dscp_tg_reg));
}

static void __exit dscp_mt_exit(void)
static void __exit dscp_tg_exit(void)
{
xt_unregister_matches(dscp_mt_reg, ARRAY_SIZE(dscp_mt_reg));
xt_unregister_targets(dscp_tg_reg, ARRAY_SIZE(dscp_tg_reg));
}

module_init(dscp_mt_init);
module_exit(dscp_mt_exit);
module_init(dscp_tg_init);
module_exit(dscp_tg_exit);
@@ -1,96 +1,169 @@
/*
* IP tables module for matching the value of the TTL
* (C) 2000,2001 by Harald Welte <laforge@netfilter.org>
* TTL modification target for IP tables
* (C) 2000,2005 by Harald Welte <laforge@netfilter.org>
*
* Hop Limit matching module
* (C) 2001-2002 Maciej Soltysiak <solt@dns.toxicfilms.tv>
* Hop Limit modification target for ip6tables
* Maciej Soltysiak <solt@dns.toxicfilms.tv>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/

#include <linux/ip.h>
#include <linux/ipv6.h>
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <net/checksum.h>

#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ipt_ttl.h>
#include <linux/netfilter_ipv6/ip6t_hl.h>
#include <linux/netfilter_ipv4/ipt_TTL.h>
#include <linux/netfilter_ipv6/ip6t_HL.h>

MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
MODULE_DESCRIPTION("Xtables: Hoplimit/TTL field match");
MODULE_DESCRIPTION("Xtables: Hoplimit/TTL Limit field modification target");
MODULE_LICENSE("GPL");
MODULE_ALIAS("ipt_ttl");
MODULE_ALIAS("ip6t_hl");

static bool ttl_mt(const struct sk_buff *skb, struct xt_action_param *par)
static unsigned int
ttl_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ipt_ttl_info *info = par->matchinfo;
const u8 ttl = ip_hdr(skb)->ttl;
struct iphdr *iph;
const struct ipt_TTL_info *info = par->targinfo;
int new_ttl;

if (!skb_make_writable(skb, skb->len))
return NF_DROP;

iph = ip_hdr(skb);

switch (info->mode) {
case IPT_TTL_EQ:
return ttl == info->ttl;
case IPT_TTL_NE:
return ttl != info->ttl;
case IPT_TTL_LT:
return ttl < info->ttl;
case IPT_TTL_GT:
return ttl > info->ttl;
case IPT_TTL_SET:
new_ttl = info->ttl;
break;
case IPT_TTL_INC:
new_ttl = iph->ttl + info->ttl;
if (new_ttl > 255)
new_ttl = 255;
break;
case IPT_TTL_DEC:
new_ttl = iph->ttl - info->ttl;
if (new_ttl < 0)
new_ttl = 0;
break;
default:
new_ttl = iph->ttl;
break;
}

if (new_ttl != iph->ttl) {
csum_replace2(&iph->check, htons(iph->ttl << 8),
htons(new_ttl << 8));
iph->ttl = new_ttl;
}

return false;
return XT_CONTINUE;
}

static bool hl_mt6(const struct sk_buff *skb, struct xt_action_param *par)
static unsigned int
hl_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ip6t_hl_info *info = par->matchinfo;
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct ipv6hdr *ip6h;
const struct ip6t_HL_info *info = par->targinfo;
int new_hl;

if (!skb_make_writable(skb, skb->len))
return NF_DROP;

ip6h = ipv6_hdr(skb);

switch (info->mode) {
case IP6T_HL_EQ:
return ip6h->hop_limit == info->hop_limit;
case IP6T_HL_NE:
return ip6h->hop_limit != info->hop_limit;
case IP6T_HL_LT:
return ip6h->hop_limit < info->hop_limit;
case IP6T_HL_GT:
return ip6h->hop_limit > info->hop_limit;
case IP6T_HL_SET:
new_hl = info->hop_limit;
break;
case IP6T_HL_INC:
new_hl = ip6h->hop_limit + info->hop_limit;
if (new_hl > 255)
new_hl = 255;
break;
case IP6T_HL_DEC:
new_hl = ip6h->hop_limit - info->hop_limit;
if (new_hl < 0)
new_hl = 0;
break;
default:
new_hl = ip6h->hop_limit;
break;
}

return false;
ip6h->hop_limit = new_hl;

return XT_CONTINUE;
}

static int ttl_tg_check(const struct xt_tgchk_param *par)
{
const struct ipt_TTL_info *info = par->targinfo;

if (info->mode > IPT_TTL_MAXMODE) {
pr_info("TTL: invalid or unknown mode %u\n", info->mode);
return -EINVAL;
}
if (info->mode != IPT_TTL_SET && info->ttl == 0)
return -EINVAL;
return 0;
}

static int hl_tg6_check(const struct xt_tgchk_param *par)
{
const struct ip6t_HL_info *info = par->targinfo;

if (info->mode > IP6T_HL_MAXMODE) {
pr_info("invalid or unknown mode %u\n", info->mode);
return -EINVAL;
}
if (info->mode != IP6T_HL_SET && info->hop_limit == 0) {
pr_info("increment/decrement does not "
"make sense with value 0\n");
return -EINVAL;
}
return 0;
}

static struct xt_match hl_mt_reg[] __read_mostly = {
static struct xt_target hl_tg_reg[] __read_mostly = {
{
.name = "ttl",
.name = "TTL",
.revision = 0,
.family = NFPROTO_IPV4,
.match = ttl_mt,
.matchsize = sizeof(struct ipt_ttl_info),
.target = ttl_tg,
.targetsize = sizeof(struct ipt_TTL_info),
.table = "mangle",
.checkentry = ttl_tg_check,
.me = THIS_MODULE,
},
{
.name = "hl",
.name = "HL",
.revision = 0,
.family = NFPROTO_IPV6,
.match = hl_mt6,
.matchsize = sizeof(struct ip6t_hl_info),
.target = hl_tg6,
.targetsize = sizeof(struct ip6t_HL_info),
.table = "mangle",
.checkentry = hl_tg6_check,
.me = THIS_MODULE,
},
};

static int __init hl_mt_init(void)
static int __init hl_tg_init(void)
{
return xt_register_matches(hl_mt_reg, ARRAY_SIZE(hl_mt_reg));
return xt_register_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg));
}

static void __exit hl_mt_exit(void)
static void __exit hl_tg_exit(void)
{
xt_unregister_matches(hl_mt_reg, ARRAY_SIZE(hl_mt_reg));
xt_unregister_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg));
}

module_init(hl_mt_init);
module_exit(hl_mt_exit);
module_init(hl_tg_init);
module_exit(hl_tg_exit);
MODULE_ALIAS("ipt_TTL");
MODULE_ALIAS("ip6t_HL");
@@ -8,150 +8,188 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/gen_stats.h>
#include <linux/jhash.h>
#include <linux/rtnetlink.h>
#include <linux/random.h>
#include <linux/slab.h>
#include <net/gen_stats.h>
#include <net/netlink.h>

#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_rateest.h>
#include <linux/netfilter/xt_RATEEST.h>
#include <net/netfilter/xt_rateest.h>

static DEFINE_MUTEX(xt_rateest_mutex);

static bool
xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par)
#define RATEEST_HSIZE 16
static struct hlist_head rateest_hash[RATEEST_HSIZE] __read_mostly;
static unsigned int jhash_rnd __read_mostly;
static bool rnd_inited __read_mostly;

static unsigned int xt_rateest_hash(const char *name)
{
const struct xt_rateest_match_info *info = par->matchinfo;
struct gnet_stats_rate_est *r;
u_int32_t bps1, bps2, pps1, pps2;
bool ret = true;

spin_lock_bh(&info->est1->lock);
r = &info->est1->rstats;
if (info->flags & XT_RATEEST_MATCH_DELTA) {
bps1 = info->bps1 >= r->bps ? info->bps1 - r->bps : 0;
pps1 = info->pps1 >= r->pps ? info->pps1 - r->pps : 0;
} else {
bps1 = r->bps;
pps1 = r->pps;
}
spin_unlock_bh(&info->est1->lock);

if (info->flags & XT_RATEEST_MATCH_ABS) {
bps2 = info->bps2;
pps2 = info->pps2;
} else {
spin_lock_bh(&info->est2->lock);
r = &info->est2->rstats;
if (info->flags & XT_RATEEST_MATCH_DELTA) {
bps2 = info->bps2 >= r->bps ? info->bps2 - r->bps : 0;
pps2 = info->pps2 >= r->pps ? info->pps2 - r->pps : 0;
} else {
bps2 = r->bps;
pps2 = r->pps;
return jhash(name, FIELD_SIZEOF(struct xt_rateest, name), jhash_rnd) &
(RATEEST_HSIZE - 1);
}

static void xt_rateest_hash_insert(struct xt_rateest *est)
{
unsigned int h;

h = xt_rateest_hash(est->name);
hlist_add_head(&est->list, &rateest_hash[h]);
}

struct xt_rateest *xt_rateest_lookup(const char *name)
{
struct xt_rateest *est;
struct hlist_node *n;
unsigned int h;

h = xt_rateest_hash(name);
mutex_lock(&xt_rateest_mutex);
hlist_for_each_entry(est, n, &rateest_hash[h], list) {
if (strcmp(est->name, name) == 0) {
est->refcnt++;
mutex_unlock(&xt_rateest_mutex);
return est;
}
spin_unlock_bh(&info->est2->lock);
}
mutex_unlock(&xt_rateest_mutex);
return NULL;
}
EXPORT_SYMBOL_GPL(xt_rateest_lookup);

switch (info->mode) {
case XT_RATEEST_MATCH_LT:
if (info->flags & XT_RATEEST_MATCH_BPS)
ret &= bps1 < bps2;
if (info->flags & XT_RATEEST_MATCH_PPS)
ret &= pps1 < pps2;
break;
case XT_RATEEST_MATCH_GT:
if (info->flags & XT_RATEEST_MATCH_BPS)
ret &= bps1 > bps2;
if (info->flags & XT_RATEEST_MATCH_PPS)
ret &= pps1 > pps2;
break;
case XT_RATEEST_MATCH_EQ:
if (info->flags & XT_RATEEST_MATCH_BPS)
ret &= bps1 == bps2;
if (info->flags & XT_RATEEST_MATCH_PPS)
ret &= pps1 == pps2;
break;
void xt_rateest_put(struct xt_rateest *est)
{
mutex_lock(&xt_rateest_mutex);
if (--est->refcnt == 0) {
hlist_del(&est->list);
gen_kill_estimator(&est->bstats, &est->rstats);
/*
* gen_estimator est_timer() might access est->lock or bstats,
* wait a RCU grace period before freeing 'est'
*/
kfree_rcu(est, rcu);
}

ret ^= info->flags & XT_RATEEST_MATCH_INVERT ? true : false;
return ret;
mutex_unlock(&xt_rateest_mutex);
}
EXPORT_SYMBOL_GPL(xt_rateest_put);

static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
static unsigned int
xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
struct xt_rateest_match_info *info = par->matchinfo;
struct xt_rateest *est1, *est2;
int ret = -EINVAL;
const struct xt_rateest_target_info *info = par->targinfo;
struct gnet_stats_basic_packed *stats = &info->est->bstats;

if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS |
XT_RATEEST_MATCH_REL)) != 1)
goto err1;
spin_lock_bh(&info->est->lock);
stats->bytes += skb->len;
stats->packets++;
spin_unlock_bh(&info->est->lock);

if (!(info->flags & (XT_RATEEST_MATCH_BPS | XT_RATEEST_MATCH_PPS)))
goto err1;
return XT_CONTINUE;
}

switch (info->mode) {
case XT_RATEEST_MATCH_EQ:
case XT_RATEEST_MATCH_LT:
case XT_RATEEST_MATCH_GT:
break;
default:
goto err1;
static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
{
struct xt_rateest_target_info *info = par->targinfo;
struct xt_rateest *est;
struct {
struct nlattr opt;
struct gnet_estimator est;
} cfg;
int ret;

if (unlikely(!rnd_inited)) {
get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
rnd_inited = true;
}

est = xt_rateest_lookup(info->name);
if (est) {
/*
* If estimator parameters are specified, they must match the
* existing estimator.
*/
if ((!info->interval && !info->ewma_log) ||
(info->interval != est->params.interval ||
info->ewma_log != est->params.ewma_log)) {
xt_rateest_put(est);
return -EINVAL;
}
info->est = est;
return 0;
}

ret = -ENOENT;
est1 = xt_rateest_lookup(info->name1);
if (!est1)
ret = -ENOMEM;
est = kzalloc(sizeof(*est), GFP_KERNEL);
if (!est)
goto err1;

est2 = NULL;
if (info->flags & XT_RATEEST_MATCH_REL) {
est2 = xt_rateest_lookup(info->name2);
if (!est2)
goto err2;
}
strlcpy(est->name, info->name, sizeof(est->name));
spin_lock_init(&est->lock);
est->refcnt = 1;
est->params.interval = info->interval;
est->params.ewma_log = info->ewma_log;

cfg.opt.nla_len = nla_attr_size(sizeof(cfg.est));
cfg.opt.nla_type = TCA_STATS_RATE_EST;
cfg.est.interval = info->interval;
cfg.est.ewma_log = info->ewma_log;

ret = gen_new_estimator(&est->bstats, &est->rstats,
&est->lock, &cfg.opt);
if (ret < 0)
goto err2;

info->est1 = est1;
info->est2 = est2;
info->est = est;
xt_rateest_hash_insert(est);
return 0;

err2:
xt_rateest_put(est1);
kfree(est);
err1:
return ret;
}

static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par)
static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par)
{
struct xt_rateest_match_info *info = par->matchinfo;
struct xt_rateest_target_info *info = par->targinfo;

xt_rateest_put(info->est1);
if (info->est2)
xt_rateest_put(info->est2);
xt_rateest_put(info->est);
}

static struct xt_match xt_rateest_mt_reg __read_mostly = {
.name = "rateest",
static struct xt_target xt_rateest_tg_reg __read_mostly = {
.name = "RATEEST",
.revision = 0,
.family = NFPROTO_UNSPEC,
.match = xt_rateest_mt,
.checkentry = xt_rateest_mt_checkentry,
.destroy = xt_rateest_mt_destroy,
.matchsize = sizeof(struct xt_rateest_match_info),
.target = xt_rateest_tg,
.checkentry = xt_rateest_tg_checkentry,
.destroy = xt_rateest_tg_destroy,
.targetsize = sizeof(struct xt_rateest_target_info),
.me = THIS_MODULE,
};

static int __init xt_rateest_mt_init(void)
static int __init xt_rateest_tg_init(void)
{
return xt_register_match(&xt_rateest_mt_reg);
unsigned int i;

for (i = 0; i < ARRAY_SIZE(rateest_hash); i++)
INIT_HLIST_HEAD(&rateest_hash[i]);

return xt_register_target(&xt_rateest_tg_reg);
}

static void __exit xt_rateest_mt_fini(void)
static void __exit xt_rateest_tg_fini(void)
{
xt_unregister_match(&xt_rateest_mt_reg);
xt_unregister_target(&xt_rateest_tg_reg);
}


MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("xtables rate estimator match");
MODULE_ALIAS("ipt_rateest");
MODULE_ALIAS("ip6t_rateest");
module_init(xt_rateest_mt_init);
module_exit(xt_rateest_mt_fini);
MODULE_DESCRIPTION("Xtables: packet rate estimator");
MODULE_ALIAS("ipt_RATEEST");
MODULE_ALIAS("ip6t_RATEEST");
module_init(xt_rateest_tg_init);
module_exit(xt_rateest_tg_fini);
@@ -1,110 +1,320 @@
/* Kernel module to match TCP MSS values. */

/* Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
* Portions (C) 2005 by Harald Welte <laforge@netfilter.org>
/*
* This is a module which is used for setting the MSS option in TCP packets.
*
* Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/gfp.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <net/dst.h>
#include <net/flow.h>
#include <net/ipv6.h>
#include <net/route.h>
#include <net/tcp.h>

#include <linux/netfilter/xt_tcpmss.h>
#include <linux/netfilter/x_tables.h>

#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_tcpudp.h>
#include <linux/netfilter/xt_TCPMSS.h>

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
MODULE_DESCRIPTION("Xtables: TCP MSS match");
MODULE_ALIAS("ipt_tcpmss");
MODULE_ALIAS("ip6t_tcpmss");
MODULE_DESCRIPTION("Xtables: TCP Maximum Segment Size (MSS) adjustment");
MODULE_ALIAS("ipt_TCPMSS");
MODULE_ALIAS("ip6t_TCPMSS");

static bool
tcpmss_mt(const struct sk_buff *skb, struct xt_action_param *par)
static inline unsigned int
optlen(const u_int8_t *opt, unsigned int offset)
{
const struct xt_tcpmss_match_info *info = par->matchinfo;
const struct tcphdr *th;
struct tcphdr _tcph;
/* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
const u_int8_t *op;
u8 _opt[15 * 4 - sizeof(_tcph)];
unsigned int i, optlen;

/* If we don't have the whole header, drop packet. */
th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph);
if (th == NULL)
goto dropit;

/* Malformed. */
if (th->doff*4 < sizeof(*th))
goto dropit;

optlen = th->doff*4 - sizeof(*th);
if (!optlen)
goto out;

/* Truncated options. */
op = skb_header_pointer(skb, par->thoff + sizeof(*th), optlen, _opt);
if (op == NULL)
goto dropit;

for (i = 0; i < optlen; ) {
if (op[i] == TCPOPT_MSS
&& (optlen - i) >= TCPOLEN_MSS
&& op[i+1] == TCPOLEN_MSS) {
u_int16_t mssval;

mssval = (op[i+2] << 8) | op[i+3];

return (mssval >= info->mss_min &&
mssval <= info->mss_max) ^ info->invert;
/* Beware zero-length options: make finite progress */
if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
return 1;
else
return opt[offset+1];
}

static int
tcpmss_mangle_packet(struct sk_buff *skb,
const struct xt_tcpmss_info *info,
unsigned int in_mtu,
unsigned int tcphoff,
unsigned int minlen)
{
struct tcphdr *tcph;
unsigned int tcplen, i;
__be16 oldval;
u16 newmss;
u8 *opt;

if (!skb_make_writable(skb, skb->len))
return -1;

tcplen = skb->len - tcphoff;
tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);

/* Header cannot be larger than the packet */
if (tcplen < tcph->doff*4)
return -1;

if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
if (dst_mtu(skb_dst(skb)) <= minlen) {
if (net_ratelimit())
pr_err("unknown or invalid path-MTU (%u)\n",
dst_mtu(skb_dst(skb)));
return -1;
}
if (in_mtu <= minlen) {
if (net_ratelimit())
pr_err("unknown or invalid path-MTU (%u)\n",
in_mtu);
return -1;
}
newmss = min(dst_mtu(skb_dst(skb)), in_mtu) - minlen;
} else
newmss = info->mss;

opt = (u_int8_t *)tcph;
for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) {
if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS &&
opt[i+1] == TCPOLEN_MSS) {
u_int16_t oldmss;

oldmss = (opt[i+2] << 8) | opt[i+3];

/* Never increase MSS, even when setting it, as
* doing so results in problems for hosts that rely
* on MSS being set correctly.
*/
if (oldmss <= newmss)
return 0;

opt[i+2] = (newmss & 0xff00) >> 8;
opt[i+3] = newmss & 0x00ff;

inet_proto_csum_replace2(&tcph->check, skb,
htons(oldmss), htons(newmss),
0);
return 0;
}
if (op[i] < 2)
i++;
else
i += op[i+1] ? : 1;
}
out:
return info->invert;

dropit:
par->hotdrop = true;
/* There is data after the header so the option can't be added
without moving it, and doing so may make the SYN packet
itself too large. Accept the packet unmodified instead. */
if (tcplen > tcph->doff*4)
return 0;

/*
* MSS Option not found ?! add it..
*/
if (skb_tailroom(skb) < TCPOLEN_MSS) {
if (pskb_expand_head(skb, 0,
TCPOLEN_MSS - skb_tailroom(skb),
GFP_ATOMIC))
return -1;
tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
}

skb_put(skb, TCPOLEN_MSS);

opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));

inet_proto_csum_replace2(&tcph->check, skb,
htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1);
opt[0] = TCPOPT_MSS;
opt[1] = TCPOLEN_MSS;
opt[2] = (newmss & 0xff00) >> 8;
opt[3] = newmss & 0x00ff;

inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0);

oldval = ((__be16 *)tcph)[6];
tcph->doff += TCPOLEN_MSS/4;
inet_proto_csum_replace2(&tcph->check, skb,
oldval, ((__be16 *)tcph)[6], 0);
return TCPOLEN_MSS;
}

static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
unsigned int family)
{
struct flowi fl;
const struct nf_afinfo *ai;
struct rtable *rt = NULL;
u_int32_t mtu = ~0U;

if (family == PF_INET) {
struct flowi4 *fl4 = &fl.u.ip4;
memset(fl4, 0, sizeof(*fl4));
fl4->daddr = ip_hdr(skb)->saddr;
} else {
struct flowi6 *fl6 = &fl.u.ip6;

memset(fl6, 0, sizeof(*fl6));
fl6->daddr = ipv6_hdr(skb)->saddr;
}
rcu_read_lock();
ai = nf_get_afinfo(family);
if (ai != NULL)
ai->route(&init_net, (struct dst_entry **)&rt, &fl, false);
rcu_read_unlock();

if (rt != NULL) {
mtu = dst_mtu(&rt->dst);
dst_release(&rt->dst);
}
return mtu;
}

static unsigned int
tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par)
{
struct iphdr *iph = ip_hdr(skb);
__be16 newlen;
int ret;

ret = tcpmss_mangle_packet(skb, par->targinfo,
tcpmss_reverse_mtu(skb, PF_INET),
iph->ihl * 4,
sizeof(*iph) + sizeof(struct tcphdr));
if (ret < 0)
return NF_DROP;
if (ret > 0) {
iph = ip_hdr(skb);
newlen = htons(ntohs(iph->tot_len) + ret);
csum_replace2(&iph->check, iph->tot_len, newlen);
iph->tot_len = newlen;
}
return XT_CONTINUE;
}

#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static unsigned int
tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
u8 nexthdr;
__be16 frag_off;
int tcphoff;
int ret;

nexthdr = ipv6h->nexthdr;
tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr, &frag_off);
if (tcphoff < 0)
return NF_DROP;
ret = tcpmss_mangle_packet(skb, par->targinfo,
tcpmss_reverse_mtu(skb, PF_INET6),
tcphoff,
sizeof(*ipv6h) + sizeof(struct tcphdr));
if (ret < 0)
return NF_DROP;
if (ret > 0) {
ipv6h = ipv6_hdr(skb);
ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
}
return XT_CONTINUE;
}
#endif

/* Must specify -p tcp --syn */
static inline bool find_syn_match(const struct xt_entry_match *m)
{
const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data;

if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
tcpinfo->flg_cmp & TCPHDR_SYN &&
!(tcpinfo->invflags & XT_TCP_INV_FLAGS))
return true;

return false;
}

static struct xt_match tcpmss_mt_reg[] __read_mostly = {
static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
{
const struct xt_tcpmss_info *info = par->targinfo;
const struct ipt_entry *e = par->entryinfo;
const struct xt_entry_match *ematch;

if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
(par->hook_mask & ~((1 << NF_INET_FORWARD) |
(1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_POST_ROUTING))) != 0) {
pr_info("path-MTU clamping only supported in "
"FORWARD, OUTPUT and POSTROUTING hooks\n");
return -EINVAL;
}
xt_ematch_foreach(ematch, e)
if (find_syn_match(ematch))
return 0;
pr_info("Only works on TCP SYN packets\n");
return -EINVAL;
}

#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
{
const struct xt_tcpmss_info *info = par->targinfo;
const struct ip6t_entry *e = par->entryinfo;
const struct xt_entry_match *ematch;

if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
(par->hook_mask & ~((1 << NF_INET_FORWARD) |
(1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_POST_ROUTING))) != 0) {
pr_info("path-MTU clamping only supported in "
"FORWARD, OUTPUT and POSTROUTING hooks\n");
return -EINVAL;
}
xt_ematch_foreach(ematch, e)
if (find_syn_match(ematch))
return 0;
pr_info("Only works on TCP SYN packets\n");
return -EINVAL;
}
#endif

static struct xt_target tcpmss_tg_reg[] __read_mostly = {
{
.name = "tcpmss",
.family = NFPROTO_IPV4,
.match = tcpmss_mt,
.matchsize = sizeof(struct xt_tcpmss_match_info),
.name = "TCPMSS",
.checkentry = tcpmss_tg4_check,
.target = tcpmss_tg4,
.targetsize = sizeof(struct xt_tcpmss_info),
.proto = IPPROTO_TCP,
.me = THIS_MODULE,
},
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
{
.name = "tcpmss",
.family = NFPROTO_IPV6,
.match = tcpmss_mt,
.matchsize = sizeof(struct xt_tcpmss_match_info),
.name = "TCPMSS",
.checkentry = tcpmss_tg6_check,
.target = tcpmss_tg6,
.targetsize = sizeof(struct xt_tcpmss_info),
.proto = IPPROTO_TCP,
.me = THIS_MODULE,
},
#endif
};

static int __init tcpmss_mt_init(void)
static int __init tcpmss_tg_init(void)
{
return xt_register_matches(tcpmss_mt_reg, ARRAY_SIZE(tcpmss_mt_reg));
return xt_register_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
}

static void __exit tcpmss_mt_exit(void)
static void __exit tcpmss_tg_exit(void)
{
xt_unregister_matches(tcpmss_mt_reg, ARRAY_SIZE(tcpmss_mt_reg));
xt_unregister_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
}

module_init(tcpmss_mt_init);
module_exit(tcpmss_mt_exit);
module_init(tcpmss_tg_init);
module_exit(tcpmss_tg_exit);