Skip to content
Permalink
Branch: master
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
12333 lines (10745 sloc) 329 KB
/* BGP routing information
* Copyright (C) 1996, 97, 98, 99 Kunihiro Ishiguro
* Copyright (C) 2016 Job Snijders <job@instituut.net>
*
* This file is part of GNU Zebra.
*
* GNU Zebra is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2, or (at your option) any
* later version.
*
* GNU Zebra is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; see the file COPYING; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <zebra.h>
#include <math.h>
#include "prefix.h"
#include "linklist.h"
#include "memory.h"
#include "command.h"
#include "stream.h"
#include "filter.h"
#include "log.h"
#include "routemap.h"
#include "buffer.h"
#include "sockunion.h"
#include "plist.h"
#include "thread.h"
#include "workqueue.h"
#include "queue.h"
#include "memory.h"
#include "lib/json.h"
#include "lib_errors.h"
#include "bgpd/bgpd.h"
#include "bgpd/bgp_table.h"
#include "bgpd/bgp_route.h"
#include "bgpd/bgp_attr.h"
#include "bgpd/bgp_debug.h"
#include "bgpd/bgp_errors.h"
#include "bgpd/bgp_aspath.h"
#include "bgpd/bgp_regex.h"
#include "bgpd/bgp_community.h"
#include "bgpd/bgp_ecommunity.h"
#include "bgpd/bgp_lcommunity.h"
#include "bgpd/bgp_clist.h"
#include "bgpd/bgp_packet.h"
#include "bgpd/bgp_filter.h"
#include "bgpd/bgp_fsm.h"
#include "bgpd/bgp_mplsvpn.h"
#include "bgpd/bgp_nexthop.h"
#include "bgpd/bgp_damp.h"
#include "bgpd/bgp_advertise.h"
#include "bgpd/bgp_zebra.h"
#include "bgpd/bgp_vty.h"
#include "bgpd/bgp_mpath.h"
#include "bgpd/bgp_nht.h"
#include "bgpd/bgp_updgrp.h"
#include "bgpd/bgp_label.h"
#include "bgpd/bgp_addpath.h"
#include "bgpd/bgp_mac.h"
#if ENABLE_BGP_VNC
#include "bgpd/rfapi/rfapi_backend.h"
#include "bgpd/rfapi/vnc_import_bgp.h"
#include "bgpd/rfapi/vnc_export_bgp.h"
#endif
#include "bgpd/bgp_encap_types.h"
#include "bgpd/bgp_encap_tlv.h"
#include "bgpd/bgp_evpn.h"
#include "bgpd/bgp_evpn_vty.h"
#include "bgpd/bgp_flowspec.h"
#include "bgpd/bgp_flowspec_util.h"
#include "bgpd/bgp_pbr.h"
#ifndef VTYSH_EXTRACT_PL
#include "bgpd/bgp_route_clippy.c"
#endif
/* Extern from bgp_dump.c */
extern const char *bgp_origin_str[];
extern const char *bgp_origin_long_str[];
/* PMSI strings. */
#define PMSI_TNLTYPE_STR_NO_INFO "No info"
#define PMSI_TNLTYPE_STR_DEFAULT PMSI_TNLTYPE_STR_NO_INFO
static const struct message bgp_pmsi_tnltype_str[] = {
{PMSI_TNLTYPE_NO_INFO, PMSI_TNLTYPE_STR_NO_INFO},
{PMSI_TNLTYPE_RSVP_TE_P2MP, "RSVP-TE P2MP"},
{PMSI_TNLTYPE_MLDP_P2MP, "mLDP P2MP"},
{PMSI_TNLTYPE_PIM_SSM, "PIM-SSM"},
{PMSI_TNLTYPE_PIM_SM, "PIM-SM"},
{PMSI_TNLTYPE_PIM_BIDIR, "PIM-BIDIR"},
{PMSI_TNLTYPE_INGR_REPL, "Ingress Replication"},
{PMSI_TNLTYPE_MLDP_MP2MP, "mLDP MP2MP"},
{0}
};
#define VRFID_NONE_STR "-"
struct bgp_node *bgp_afi_node_get(struct bgp_table *table, afi_t afi,
safi_t safi, struct prefix *p,
struct prefix_rd *prd)
{
struct bgp_node *rn;
struct bgp_node *prn = NULL;
assert(table);
if (!table)
return NULL;
if ((safi == SAFI_MPLS_VPN) || (safi == SAFI_ENCAP)
|| (safi == SAFI_EVPN)) {
prn = bgp_node_get(table, (struct prefix *)prd);
if (!bgp_node_has_bgp_path_info_data(prn))
bgp_node_set_bgp_table_info(
prn, bgp_table_init(table->bgp, afi, safi));
else
bgp_unlock_node(prn);
table = bgp_node_get_bgp_table_info(prn);
}
rn = bgp_node_get(table, p);
if ((safi == SAFI_MPLS_VPN) || (safi == SAFI_ENCAP)
|| (safi == SAFI_EVPN))
rn->prn = prn;
return rn;
}
struct bgp_node *bgp_afi_node_lookup(struct bgp_table *table, afi_t afi,
safi_t safi, struct prefix *p,
struct prefix_rd *prd)
{
struct bgp_node *rn;
struct bgp_node *prn = NULL;
if (!table)
return NULL;
if ((safi == SAFI_MPLS_VPN) || (safi == SAFI_ENCAP)
|| (safi == SAFI_EVPN)) {
prn = bgp_node_lookup(table, (struct prefix *)prd);
if (!prn)
return NULL;
if (!bgp_node_has_bgp_path_info_data(prn)) {
bgp_unlock_node(prn);
return NULL;
}
table = bgp_node_get_bgp_table_info(prn);
}
rn = bgp_node_lookup(table, p);
return rn;
}
/* Allocate bgp_path_info_extra */
static struct bgp_path_info_extra *bgp_path_info_extra_new(void)
{
struct bgp_path_info_extra *new;
new = XCALLOC(MTYPE_BGP_ROUTE_EXTRA,
sizeof(struct bgp_path_info_extra));
new->label[0] = MPLS_INVALID_LABEL;
new->num_labels = 0;
new->bgp_fs_pbr = list_new();
new->bgp_fs_iprule = list_new();
return new;
}
void bgp_path_info_extra_free(struct bgp_path_info_extra **extra)
{
struct bgp_path_info_extra *e;
if (!extra || !*extra)
return;
e = *extra;
if (e->damp_info)
bgp_damp_info_free(e->damp_info, 0);
e->damp_info = NULL;
if (e->parent) {
struct bgp_path_info *bpi = (struct bgp_path_info *)e->parent;
if (bpi->net) {
/* FIXME: since multiple e may have the same e->parent
* and e->parent->net is holding a refcount for each
* of them, we need to do some fudging here.
*
* WARNING: if bpi->net->lock drops to 0, bpi may be
* freed as well (because bpi->net was holding the
* last reference to bpi) => write after free!
*/
unsigned refcount;
bpi = bgp_path_info_lock(bpi);
refcount = bpi->net->lock - 1;
bgp_unlock_node((struct bgp_node *)bpi->net);
if (!refcount)
bpi->net = NULL;
bgp_path_info_unlock(bpi);
}
bgp_path_info_unlock(e->parent);
e->parent = NULL;
}
if (e->bgp_orig)
bgp_unlock(e->bgp_orig);
if ((*extra)->bgp_fs_iprule)
list_delete(&((*extra)->bgp_fs_iprule));
if ((*extra)->bgp_fs_pbr)
list_delete(&((*extra)->bgp_fs_pbr));
XFREE(MTYPE_BGP_ROUTE_EXTRA, *extra);
*extra = NULL;
}
/* Get bgp_path_info extra information for the given bgp_path_info, lazy
* allocated if required.
*/
struct bgp_path_info_extra *bgp_path_info_extra_get(struct bgp_path_info *pi)
{
if (!pi->extra)
pi->extra = bgp_path_info_extra_new();
return pi->extra;
}
/* Allocate new bgp info structure. */
struct bgp_path_info *bgp_path_info_new(void)
{
return XCALLOC(MTYPE_BGP_ROUTE, sizeof(struct bgp_path_info));
}
/* Free bgp route information. */
static void bgp_path_info_free(struct bgp_path_info *path)
{
if (path->attr)
bgp_attr_unintern(&path->attr);
bgp_unlink_nexthop(path);
bgp_path_info_extra_free(&path->extra);
bgp_path_info_mpath_free(&path->mpath);
if (path->net)
bgp_addpath_free_info_data(&path->tx_addpath,
&path->net->tx_addpath);
peer_unlock(path->peer); /* bgp_path_info peer reference */
XFREE(MTYPE_BGP_ROUTE, path);
}
struct bgp_path_info *bgp_path_info_lock(struct bgp_path_info *path)
{
path->lock++;
return path;
}
struct bgp_path_info *bgp_path_info_unlock(struct bgp_path_info *path)
{
assert(path && path->lock > 0);
path->lock--;
if (path->lock == 0) {
#if 0
zlog_debug ("%s: unlocked and freeing", __func__);
zlog_backtrace (LOG_DEBUG);
#endif
bgp_path_info_free(path);
return NULL;
}
#if 0
if (path->lock == 1)
{
zlog_debug ("%s: unlocked to 1", __func__);
zlog_backtrace (LOG_DEBUG);
}
#endif
return path;
}
void bgp_path_info_add(struct bgp_node *rn, struct bgp_path_info *pi)
{
struct bgp_path_info *top;
top = bgp_node_get_bgp_path_info(rn);
pi->next = top;
pi->prev = NULL;
if (top)
top->prev = pi;
bgp_node_set_bgp_path_info(rn, pi);
bgp_path_info_lock(pi);
bgp_lock_node(rn);
peer_lock(pi->peer); /* bgp_path_info peer reference */
}
/* Do the actual removal of info from RIB, for use by bgp_process
completion callback *only* */
void bgp_path_info_reap(struct bgp_node *rn, struct bgp_path_info *pi)
{
if (pi->next)
pi->next->prev = pi->prev;
if (pi->prev)
pi->prev->next = pi->next;
else
bgp_node_set_bgp_path_info(rn, pi->next);
bgp_path_info_mpath_dequeue(pi);
bgp_path_info_unlock(pi);
bgp_unlock_node(rn);
}
void bgp_path_info_delete(struct bgp_node *rn, struct bgp_path_info *pi)
{
bgp_path_info_set_flag(rn, pi, BGP_PATH_REMOVED);
/* set of previous already took care of pcount */
UNSET_FLAG(pi->flags, BGP_PATH_VALID);
}
/* undo the effects of a previous call to bgp_path_info_delete; typically
called when a route is deleted and then quickly re-added before the
deletion has been processed */
void bgp_path_info_restore(struct bgp_node *rn, struct bgp_path_info *pi)
{
bgp_path_info_unset_flag(rn, pi, BGP_PATH_REMOVED);
/* unset of previous already took care of pcount */
SET_FLAG(pi->flags, BGP_PATH_VALID);
}
/* Adjust pcount as required */
static void bgp_pcount_adjust(struct bgp_node *rn, struct bgp_path_info *pi)
{
struct bgp_table *table;
assert(rn && bgp_node_table(rn));
assert(pi && pi->peer && pi->peer->bgp);
table = bgp_node_table(rn);
if (pi->peer == pi->peer->bgp->peer_self)
return;
if (!BGP_PATH_COUNTABLE(pi)
&& CHECK_FLAG(pi->flags, BGP_PATH_COUNTED)) {
UNSET_FLAG(pi->flags, BGP_PATH_COUNTED);
/* slight hack, but more robust against errors. */
if (pi->peer->pcount[table->afi][table->safi])
pi->peer->pcount[table->afi][table->safi]--;
else
flog_err(EC_LIB_DEVELOPMENT,
"Asked to decrement 0 prefix count for peer");
} else if (BGP_PATH_COUNTABLE(pi)
&& !CHECK_FLAG(pi->flags, BGP_PATH_COUNTED)) {
SET_FLAG(pi->flags, BGP_PATH_COUNTED);
pi->peer->pcount[table->afi][table->safi]++;
}
}
static int bgp_label_index_differs(struct bgp_path_info *pi1,
struct bgp_path_info *pi2)
{
return (!(pi1->attr->label_index == pi2->attr->label_index));
}
/* Set/unset bgp_path_info flags, adjusting any other state as needed.
* This is here primarily to keep prefix-count in check.
*/
void bgp_path_info_set_flag(struct bgp_node *rn, struct bgp_path_info *pi,
uint32_t flag)
{
SET_FLAG(pi->flags, flag);
/* early bath if we know it's not a flag that changes countability state
*/
if (!CHECK_FLAG(flag,
BGP_PATH_VALID | BGP_PATH_HISTORY | BGP_PATH_REMOVED))
return;
bgp_pcount_adjust(rn, pi);
}
void bgp_path_info_unset_flag(struct bgp_node *rn, struct bgp_path_info *pi,
uint32_t flag)
{
UNSET_FLAG(pi->flags, flag);
/* early bath if we know it's not a flag that changes countability state
*/
if (!CHECK_FLAG(flag,
BGP_PATH_VALID | BGP_PATH_HISTORY | BGP_PATH_REMOVED))
return;
bgp_pcount_adjust(rn, pi);
}
/* Get MED value. If MED value is missing and "bgp bestpath
missing-as-worst" is specified, treat it as the worst value. */
static uint32_t bgp_med_value(struct attr *attr, struct bgp *bgp)
{
if (attr->flag & ATTR_FLAG_BIT(BGP_ATTR_MULTI_EXIT_DISC))
return attr->med;
else {
if (bgp_flag_check(bgp, BGP_FLAG_MED_MISSING_AS_WORST))
return BGP_MED_MAX;
else
return 0;
}
}
void bgp_path_info_path_with_addpath_rx_str(struct bgp_path_info *pi, char *buf)
{
if (pi->addpath_rx_id)
sprintf(buf, "path %s (addpath rxid %d)", pi->peer->host,
pi->addpath_rx_id);
else
sprintf(buf, "path %s", pi->peer->host);
}
/* Compare two bgp route entity. If 'new' is preferable over 'exist' return 1.
*/
static int bgp_path_info_cmp(struct bgp *bgp, struct bgp_path_info *new,
struct bgp_path_info *exist, int *paths_eq,
struct bgp_maxpaths_cfg *mpath_cfg, int debug,
char *pfx_buf, afi_t afi, safi_t safi)
{
struct attr *newattr, *existattr;
bgp_peer_sort_t new_sort;
bgp_peer_sort_t exist_sort;
uint32_t new_pref;
uint32_t exist_pref;
uint32_t new_med;
uint32_t exist_med;
uint32_t new_weight;
uint32_t exist_weight;
uint32_t newm, existm;
struct in_addr new_id;
struct in_addr exist_id;
int new_cluster;
int exist_cluster;
int internal_as_route;
int confed_as_route;
int ret = 0;
char new_buf[PATH_ADDPATH_STR_BUFFER];
char exist_buf[PATH_ADDPATH_STR_BUFFER];
uint32_t new_mm_seq;
uint32_t exist_mm_seq;
int nh_cmp;
*paths_eq = 0;
/* 0. Null check. */
if (new == NULL) {
if (debug)
zlog_debug("%s: new is NULL", pfx_buf);
return 0;
}
if (debug)
bgp_path_info_path_with_addpath_rx_str(new, new_buf);
if (exist == NULL) {
if (debug)
zlog_debug("%s: %s is the initial bestpath", pfx_buf,
new_buf);
return 1;
}
if (debug) {
bgp_path_info_path_with_addpath_rx_str(exist, exist_buf);
zlog_debug("%s: Comparing %s flags 0x%x with %s flags 0x%x",
pfx_buf, new_buf, new->flags, exist_buf,
exist->flags);
}
newattr = new->attr;
existattr = exist->attr;
/* For EVPN routes, we cannot just go by local vs remote, we have to
* look at the MAC mobility sequence number, if present.
*/
if (safi == SAFI_EVPN) {
/* This is an error condition described in RFC 7432 Section
* 15.2. The RFC
* states that in this scenario "the PE MUST alert the operator"
* but it
* does not state what other action to take. In order to provide
* some
* consistency in this scenario we are going to prefer the path
* with the
* sticky flag.
*/
if (newattr->sticky != existattr->sticky) {
if (!debug) {
prefix2str(&new->net->p, pfx_buf,
sizeof(*pfx_buf)
* PREFIX2STR_BUFFER);
bgp_path_info_path_with_addpath_rx_str(new,
new_buf);
bgp_path_info_path_with_addpath_rx_str(
exist, exist_buf);
}
if (newattr->sticky && !existattr->sticky) {
if (debug)
zlog_debug(
"%s: %s wins over %s due to sticky MAC flag",
pfx_buf, new_buf, exist_buf);
return 1;
}
if (!newattr->sticky && existattr->sticky) {
if (debug)
zlog_debug(
"%s: %s loses to %s due to sticky MAC flag",
pfx_buf, new_buf, exist_buf);
return 0;
}
}
new_mm_seq = mac_mobility_seqnum(newattr);
exist_mm_seq = mac_mobility_seqnum(existattr);
if (new_mm_seq > exist_mm_seq) {
if (debug)
zlog_debug(
"%s: %s wins over %s due to MM seq %u > %u",
pfx_buf, new_buf, exist_buf, new_mm_seq,
exist_mm_seq);
return 1;
}
if (new_mm_seq < exist_mm_seq) {
if (debug)
zlog_debug(
"%s: %s loses to %s due to MM seq %u < %u",
pfx_buf, new_buf, exist_buf, new_mm_seq,
exist_mm_seq);
return 0;
}
/*
* if sequence numbers are the same path with the lowest IP
* wins
*/
nh_cmp = bgp_path_info_nexthop_cmp(new, exist);
if (nh_cmp < 0) {
if (debug)
zlog_debug(
"%s: %s wins over %s due to same MM seq %u and lower IP %s",
pfx_buf, new_buf, exist_buf, new_mm_seq,
inet_ntoa(new->attr->nexthop));
return 1;
}
if (nh_cmp > 0) {
if (debug)
zlog_debug(
"%s: %s loses to %s due to same MM seq %u and higher IP %s",
pfx_buf, new_buf, exist_buf, new_mm_seq,
inet_ntoa(new->attr->nexthop));
return 0;
}
}
/* 1. Weight check. */
new_weight = newattr->weight;
exist_weight = existattr->weight;
if (new_weight > exist_weight) {
if (debug)
zlog_debug("%s: %s wins over %s due to weight %d > %d",
pfx_buf, new_buf, exist_buf, new_weight,
exist_weight);
return 1;
}
if (new_weight < exist_weight) {
if (debug)
zlog_debug("%s: %s loses to %s due to weight %d < %d",
pfx_buf, new_buf, exist_buf, new_weight,
exist_weight);
return 0;
}
/* 2. Local preference check. */
new_pref = exist_pref = bgp->default_local_pref;
if (newattr->flag & ATTR_FLAG_BIT(BGP_ATTR_LOCAL_PREF))
new_pref = newattr->local_pref;
if (existattr->flag & ATTR_FLAG_BIT(BGP_ATTR_LOCAL_PREF))
exist_pref = existattr->local_pref;
if (new_pref > exist_pref) {
if (debug)
zlog_debug(
"%s: %s wins over %s due to localpref %d > %d",
pfx_buf, new_buf, exist_buf, new_pref,
exist_pref);
return 1;
}
if (new_pref < exist_pref) {
if (debug)
zlog_debug(
"%s: %s loses to %s due to localpref %d < %d",
pfx_buf, new_buf, exist_buf, new_pref,
exist_pref);
return 0;
}
/* 3. Local route check. We prefer:
* - BGP_ROUTE_STATIC
* - BGP_ROUTE_AGGREGATE
* - BGP_ROUTE_REDISTRIBUTE
*/
if (!(new->sub_type == BGP_ROUTE_NORMAL ||
new->sub_type == BGP_ROUTE_IMPORTED)) {
if (debug)
zlog_debug(
"%s: %s wins over %s due to preferred BGP_ROUTE type",
pfx_buf, new_buf, exist_buf);
return 1;
}
if (!(exist->sub_type == BGP_ROUTE_NORMAL ||
exist->sub_type == BGP_ROUTE_IMPORTED)) {
if (debug)
zlog_debug(
"%s: %s loses to %s due to preferred BGP_ROUTE type",
pfx_buf, new_buf, exist_buf);
return 0;
}
/* 4. AS path length check. */
if (!bgp_flag_check(bgp, BGP_FLAG_ASPATH_IGNORE)) {
int exist_hops = aspath_count_hops(existattr->aspath);
int exist_confeds = aspath_count_confeds(existattr->aspath);
if (bgp_flag_check(bgp, BGP_FLAG_ASPATH_CONFED)) {
int aspath_hops;
aspath_hops = aspath_count_hops(newattr->aspath);
aspath_hops += aspath_count_confeds(newattr->aspath);
if (aspath_hops < (exist_hops + exist_confeds)) {
if (debug)
zlog_debug(
"%s: %s wins over %s due to aspath (with confeds) hopcount %d < %d",
pfx_buf, new_buf, exist_buf,
aspath_hops,
(exist_hops + exist_confeds));
return 1;
}
if (aspath_hops > (exist_hops + exist_confeds)) {
if (debug)
zlog_debug(
"%s: %s loses to %s due to aspath (with confeds) hopcount %d > %d",
pfx_buf, new_buf, exist_buf,
aspath_hops,
(exist_hops + exist_confeds));
return 0;
}
} else {
int newhops = aspath_count_hops(newattr->aspath);
if (newhops < exist_hops) {
if (debug)
zlog_debug(
"%s: %s wins over %s due to aspath hopcount %d < %d",
pfx_buf, new_buf, exist_buf,
newhops, exist_hops);
return 1;
}
if (newhops > exist_hops) {
if (debug)
zlog_debug(
"%s: %s loses to %s due to aspath hopcount %d > %d",
pfx_buf, new_buf, exist_buf,
newhops, exist_hops);
return 0;
}
}
}
/* 5. Origin check. */
if (newattr->origin < existattr->origin) {
if (debug)
zlog_debug("%s: %s wins over %s due to ORIGIN %s < %s",
pfx_buf, new_buf, exist_buf,
bgp_origin_long_str[newattr->origin],
bgp_origin_long_str[existattr->origin]);
return 1;
}
if (newattr->origin > existattr->origin) {
if (debug)
zlog_debug("%s: %s loses to %s due to ORIGIN %s > %s",
pfx_buf, new_buf, exist_buf,
bgp_origin_long_str[newattr->origin],
bgp_origin_long_str[existattr->origin]);
return 0;
}
/* 6. MED check. */
internal_as_route = (aspath_count_hops(newattr->aspath) == 0
&& aspath_count_hops(existattr->aspath) == 0);
confed_as_route = (aspath_count_confeds(newattr->aspath) > 0
&& aspath_count_confeds(existattr->aspath) > 0
&& aspath_count_hops(newattr->aspath) == 0
&& aspath_count_hops(existattr->aspath) == 0);
if (bgp_flag_check(bgp, BGP_FLAG_ALWAYS_COMPARE_MED)
|| (bgp_flag_check(bgp, BGP_FLAG_MED_CONFED) && confed_as_route)
|| aspath_cmp_left(newattr->aspath, existattr->aspath)
|| aspath_cmp_left_confed(newattr->aspath, existattr->aspath)
|| internal_as_route) {
new_med = bgp_med_value(new->attr, bgp);
exist_med = bgp_med_value(exist->attr, bgp);
if (new_med < exist_med) {
if (debug)
zlog_debug(
"%s: %s wins over %s due to MED %d < %d",
pfx_buf, new_buf, exist_buf, new_med,
exist_med);
return 1;
}
if (new_med > exist_med) {
if (debug)
zlog_debug(
"%s: %s loses to %s due to MED %d > %d",
pfx_buf, new_buf, exist_buf, new_med,
exist_med);
return 0;
}
}
/* 7. Peer type check. */
new_sort = new->peer->sort;
exist_sort = exist->peer->sort;
if (new_sort == BGP_PEER_EBGP
&& (exist_sort == BGP_PEER_IBGP || exist_sort == BGP_PEER_CONFED)) {
if (debug)
zlog_debug(
"%s: %s wins over %s due to eBGP peer > iBGP peer",
pfx_buf, new_buf, exist_buf);
return 1;
}
if (exist_sort == BGP_PEER_EBGP
&& (new_sort == BGP_PEER_IBGP || new_sort == BGP_PEER_CONFED)) {
if (debug)
zlog_debug(
"%s: %s loses to %s due to iBGP peer < eBGP peer",
pfx_buf, new_buf, exist_buf);
return 0;
}
/* 8. IGP metric check. */
newm = existm = 0;
if (new->extra)
newm = new->extra->igpmetric;
if (exist->extra)
existm = exist->extra->igpmetric;
if (newm < existm) {
if (debug)
zlog_debug(
"%s: %s wins over %s due to IGP metric %d < %d",
pfx_buf, new_buf, exist_buf, newm, existm);
ret = 1;
}
if (newm > existm) {
if (debug)
zlog_debug(
"%s: %s loses to %s due to IGP metric %d > %d",
pfx_buf, new_buf, exist_buf, newm, existm);
ret = 0;
}
/* 9. Same IGP metric. Compare the cluster list length as
representative of IGP hops metric. Rewrite the metric value
pair (newm, existm) with the cluster list length. Prefer the
path with smaller cluster list length. */
if (newm == existm) {
if (peer_sort(new->peer) == BGP_PEER_IBGP
&& peer_sort(exist->peer) == BGP_PEER_IBGP
&& (mpath_cfg == NULL
|| CHECK_FLAG(
mpath_cfg->ibgp_flags,
BGP_FLAG_IBGP_MULTIPATH_SAME_CLUSTERLEN))) {
newm = BGP_CLUSTER_LIST_LENGTH(new->attr);
existm = BGP_CLUSTER_LIST_LENGTH(exist->attr);
if (newm < existm) {
if (debug)
zlog_debug(
"%s: %s wins over %s due to CLUSTER_LIST length %d < %d",
pfx_buf, new_buf, exist_buf,
newm, existm);
ret = 1;
}
if (newm > existm) {
if (debug)
zlog_debug(
"%s: %s loses to %s due to CLUSTER_LIST length %d > %d",
pfx_buf, new_buf, exist_buf,
newm, existm);
ret = 0;
}
}
}
/* 10. confed-external vs. confed-internal */
if (CHECK_FLAG(bgp->config, BGP_CONFIG_CONFEDERATION)) {
if (new_sort == BGP_PEER_CONFED
&& exist_sort == BGP_PEER_IBGP) {
if (debug)
zlog_debug(
"%s: %s wins over %s due to confed-external peer > confed-internal peer",
pfx_buf, new_buf, exist_buf);
return 1;
}
if (exist_sort == BGP_PEER_CONFED
&& new_sort == BGP_PEER_IBGP) {
if (debug)
zlog_debug(
"%s: %s loses to %s due to confed-internal peer < confed-external peer",
pfx_buf, new_buf, exist_buf);
return 0;
}
}
/* 11. Maximum path check. */
if (newm == existm) {
/* If one path has a label but the other does not, do not treat
* them as equals for multipath
*/
if ((new->extra &&bgp_is_valid_label(&new->extra->label[0]))
!= (exist->extra
&& bgp_is_valid_label(&exist->extra->label[0]))) {
if (debug)
zlog_debug(
"%s: %s and %s cannot be multipath, one has a label while the other does not",
pfx_buf, new_buf, exist_buf);
} else if (bgp_flag_check(bgp,
BGP_FLAG_ASPATH_MULTIPATH_RELAX)) {
/*
* For the two paths, all comparison steps till IGP
* metric
* have succeeded - including AS_PATH hop count. Since
* 'bgp
* bestpath as-path multipath-relax' knob is on, we
* don't need
* an exact match of AS_PATH. Thus, mark the paths are
* equal.
* That will trigger both these paths to get into the
* multipath
* array.
*/
*paths_eq = 1;
if (debug)
zlog_debug(
"%s: %s and %s are equal via multipath-relax",
pfx_buf, new_buf, exist_buf);
} else if (new->peer->sort == BGP_PEER_IBGP) {
if (aspath_cmp(new->attr->aspath,
exist->attr->aspath)) {
*paths_eq = 1;
if (debug)
zlog_debug(
"%s: %s and %s are equal via matching aspaths",
pfx_buf, new_buf, exist_buf);
}
} else if (new->peer->as == exist->peer->as) {
*paths_eq = 1;
if (debug)
zlog_debug(
"%s: %s and %s are equal via same remote-as",
pfx_buf, new_buf, exist_buf);
}
} else {
/*
* TODO: If unequal cost ibgp multipath is enabled we can
* mark the paths as equal here instead of returning
*/
if (debug) {
if (ret == 1)
zlog_debug(
"%s: %s wins over %s after IGP metric comparison",
pfx_buf, new_buf, exist_buf);
else
zlog_debug(
"%s: %s loses to %s after IGP metric comparison",
pfx_buf, new_buf, exist_buf);
}
return ret;
}
/* 12. If both paths are external, prefer the path that was received
first (the oldest one). This step minimizes route-flap, since a
newer path won't displace an older one, even if it was the
preferred route based on the additional decision criteria below. */
if (!bgp_flag_check(bgp, BGP_FLAG_COMPARE_ROUTER_ID)
&& new_sort == BGP_PEER_EBGP && exist_sort == BGP_PEER_EBGP) {
if (CHECK_FLAG(new->flags, BGP_PATH_SELECTED)) {
if (debug)
zlog_debug(
"%s: %s wins over %s due to oldest external",
pfx_buf, new_buf, exist_buf);
return 1;
}
if (CHECK_FLAG(exist->flags, BGP_PATH_SELECTED)) {
if (debug)
zlog_debug(
"%s: %s loses to %s due to oldest external",
pfx_buf, new_buf, exist_buf);
return 0;
}
}
/* 13. Router-ID comparision. */
/* If one of the paths is "stale", the corresponding peer router-id will
* be 0 and would always win over the other path. If originator id is
* used for the comparision, it will decide which path is better.
*/
if (newattr->flag & ATTR_FLAG_BIT(BGP_ATTR_ORIGINATOR_ID))
new_id.s_addr = newattr->originator_id.s_addr;
else
new_id.s_addr = new->peer->remote_id.s_addr;
if (existattr->flag & ATTR_FLAG_BIT(BGP_ATTR_ORIGINATOR_ID))
exist_id.s_addr = existattr->originator_id.s_addr;
else
exist_id.s_addr = exist->peer->remote_id.s_addr;
if (ntohl(new_id.s_addr) < ntohl(exist_id.s_addr)) {
if (debug)
zlog_debug(
"%s: %s wins over %s due to Router-ID comparison",
pfx_buf, new_buf, exist_buf);
return 1;
}
if (ntohl(new_id.s_addr) > ntohl(exist_id.s_addr)) {
if (debug)
zlog_debug(
"%s: %s loses to %s due to Router-ID comparison",
pfx_buf, new_buf, exist_buf);
return 0;
}
/* 14. Cluster length comparision. */
new_cluster = BGP_CLUSTER_LIST_LENGTH(new->attr);
exist_cluster = BGP_CLUSTER_LIST_LENGTH(exist->attr);
if (new_cluster < exist_cluster) {
if (debug)
zlog_debug(
"%s: %s wins over %s due to CLUSTER_LIST length %d < %d",
pfx_buf, new_buf, exist_buf, new_cluster,
exist_cluster);
return 1;
}
if (new_cluster > exist_cluster) {
if (debug)
zlog_debug(
"%s: %s loses to %s due to CLUSTER_LIST length %d > %d",
pfx_buf, new_buf, exist_buf, new_cluster,
exist_cluster);
return 0;
}
/* 15. Neighbor address comparision. */
/* Do this only if neither path is "stale" as stale paths do not have
* valid peer information (as the connection may or may not be up).
*/
if (CHECK_FLAG(exist->flags, BGP_PATH_STALE)) {
if (debug)
zlog_debug(
"%s: %s wins over %s due to latter path being STALE",
pfx_buf, new_buf, exist_buf);
return 1;
}
if (CHECK_FLAG(new->flags, BGP_PATH_STALE)) {
if (debug)
zlog_debug(
"%s: %s loses to %s due to former path being STALE",
pfx_buf, new_buf, exist_buf);
return 0;
}
/* locally configured routes to advertise do not have su_remote */
if (new->peer->su_remote == NULL)
return 0;
if (exist->peer->su_remote == NULL)
return 1;
ret = sockunion_cmp(new->peer->su_remote, exist->peer->su_remote);
if (ret == 1) {
if (debug)
zlog_debug(
"%s: %s loses to %s due to Neighor IP comparison",
pfx_buf, new_buf, exist_buf);
return 0;
}
if (ret == -1) {
if (debug)
zlog_debug(
"%s: %s wins over %s due to Neighor IP comparison",
pfx_buf, new_buf, exist_buf);
return 1;
}
if (debug)
zlog_debug("%s: %s wins over %s due to nothing left to compare",
pfx_buf, new_buf, exist_buf);
return 1;
}
/* Compare two bgp route entity. Return -1 if new is preferred, 1 if exist
* is preferred, or 0 if they are the same (usually will only occur if
* multipath is enabled
* This version is compatible with */
int bgp_path_info_cmp_compatible(struct bgp *bgp, struct bgp_path_info *new,
struct bgp_path_info *exist, char *pfx_buf,
afi_t afi, safi_t safi)
{
int paths_eq;
int ret;
ret = bgp_path_info_cmp(bgp, new, exist, &paths_eq, NULL, 0, pfx_buf,
afi, safi);
if (paths_eq)
ret = 0;
else {
if (ret == 1)
ret = -1;
else
ret = 1;
}
return ret;
}
static enum filter_type bgp_input_filter(struct peer *peer, struct prefix *p,
struct attr *attr, afi_t afi,
safi_t safi)
{
struct bgp_filter *filter;
filter = &peer->filter[afi][safi];
#define FILTER_EXIST_WARN(F, f, filter) \
if (BGP_DEBUG(update, UPDATE_IN) && !(F##_IN(filter))) \
zlog_debug("%s: Could not find configured input %s-list %s!", \
peer->host, #f, F##_IN_NAME(filter));
if (DISTRIBUTE_IN_NAME(filter)) {
FILTER_EXIST_WARN(DISTRIBUTE, distribute, filter);
if (access_list_apply(DISTRIBUTE_IN(filter), p) == FILTER_DENY)
return FILTER_DENY;
}
if (PREFIX_LIST_IN_NAME(filter)) {
FILTER_EXIST_WARN(PREFIX_LIST, prefix, filter);
if (prefix_list_apply(PREFIX_LIST_IN(filter), p) == PREFIX_DENY)
return FILTER_DENY;
}
if (FILTER_LIST_IN_NAME(filter)) {
FILTER_EXIST_WARN(FILTER_LIST, as, filter);
if (as_list_apply(FILTER_LIST_IN(filter), attr->aspath)
== AS_FILTER_DENY)
return FILTER_DENY;
}
return FILTER_PERMIT;
#undef FILTER_EXIST_WARN
}
static enum filter_type bgp_output_filter(struct peer *peer, struct prefix *p,
struct attr *attr, afi_t afi,
safi_t safi)
{
struct bgp_filter *filter;
filter = &peer->filter[afi][safi];
#define FILTER_EXIST_WARN(F, f, filter) \
if (BGP_DEBUG(update, UPDATE_OUT) && !(F##_OUT(filter))) \
zlog_debug("%s: Could not find configured output %s-list %s!", \
peer->host, #f, F##_OUT_NAME(filter));
if (DISTRIBUTE_OUT_NAME(filter)) {
FILTER_EXIST_WARN(DISTRIBUTE, distribute, filter);
if (access_list_apply(DISTRIBUTE_OUT(filter), p) == FILTER_DENY)
return FILTER_DENY;
}
if (PREFIX_LIST_OUT_NAME(filter)) {
FILTER_EXIST_WARN(PREFIX_LIST, prefix, filter);
if (prefix_list_apply(PREFIX_LIST_OUT(filter), p)
== PREFIX_DENY)
return FILTER_DENY;
}
if (FILTER_LIST_OUT_NAME(filter)) {
FILTER_EXIST_WARN(FILTER_LIST, as, filter);
if (as_list_apply(FILTER_LIST_OUT(filter), attr->aspath)
== AS_FILTER_DENY)
return FILTER_DENY;
}
return FILTER_PERMIT;
#undef FILTER_EXIST_WARN
}
/* If community attribute includes no_export then return 1. */
static int bgp_community_filter(struct peer *peer, struct attr *attr)
{
if (attr->community) {
/* NO_ADVERTISE check. */
if (community_include(attr->community, COMMUNITY_NO_ADVERTISE))
return 1;
/* NO_EXPORT check. */
if (peer->sort == BGP_PEER_EBGP
&& community_include(attr->community, COMMUNITY_NO_EXPORT))
return 1;
/* NO_EXPORT_SUBCONFED check. */
if (peer->sort == BGP_PEER_EBGP
|| peer->sort == BGP_PEER_CONFED)
if (community_include(attr->community,
COMMUNITY_NO_EXPORT_SUBCONFED))
return 1;
}
return 0;
}
/* Route reflection loop check. */
static int bgp_cluster_filter(struct peer *peer, struct attr *attr)
{
struct in_addr cluster_id;
if (attr->cluster) {
if (peer->bgp->config & BGP_CONFIG_CLUSTER_ID)
cluster_id = peer->bgp->cluster_id;
else
cluster_id = peer->bgp->router_id;
if (cluster_loop_check(attr->cluster, cluster_id))
return 1;
}
return 0;
}
static int bgp_input_modifier(struct peer *peer, struct prefix *p,
struct attr *attr, afi_t afi, safi_t safi,
const char *rmap_name)
{
struct bgp_filter *filter;
struct bgp_path_info rmap_path;
route_map_result_t ret;
struct route_map *rmap = NULL;
filter = &peer->filter[afi][safi];
/* Apply default weight value. */
if (peer->weight[afi][safi])
attr->weight = peer->weight[afi][safi];
if (rmap_name) {
rmap = route_map_lookup_by_name(rmap_name);
if (rmap == NULL)
return RMAP_DENY;
} else {
if (ROUTE_MAP_IN_NAME(filter)) {
rmap = ROUTE_MAP_IN(filter);
if (rmap == NULL)
return RMAP_DENY;
}
}
/* RFC 8212 to prevent route leaks.
* This specification intends to improve this situation by requiring the
* explicit configuration of both BGP Import and Export Policies for any
* External BGP (EBGP) session such as customers, peers, or
* confederation boundaries for all enabled address families. Through
* codification of the aforementioned requirement, operators will
* benefit from consistent behavior across different BGP
* implementations.
*/
if (peer->bgp->ebgp_requires_policy
== DEFAULT_EBGP_POLICY_ENABLED)
if (!bgp_inbound_policy_exists(peer, filter))
return RMAP_DENY;
/* Route map apply. */
if (rmap) {
memset(&rmap_path, 0, sizeof(struct bgp_path_info));
/* Duplicate current value to new strucutre for modification. */
rmap_path.peer = peer;
rmap_path.attr = attr;
SET_FLAG(peer->rmap_type, PEER_RMAP_TYPE_IN);
/* Apply BGP route map to the attribute. */
ret = route_map_apply(rmap, p, RMAP_BGP, &rmap_path);
peer->rmap_type = 0;
if (ret == RMAP_DENYMATCH)
return RMAP_DENY;
}
return RMAP_PERMIT;
}
static int bgp_output_modifier(struct peer *peer, struct prefix *p,
struct attr *attr, afi_t afi, safi_t safi,
const char *rmap_name)
{
struct bgp_path_info rmap_path;
route_map_result_t ret;
struct route_map *rmap = NULL;
uint8_t rmap_type;
/*
* So if we get to this point and have no rmap_name
* we want to just show the output as it currently
* exists.
*/
if (!rmap_name)
return RMAP_PERMIT;
/* Apply default weight value. */
if (peer->weight[afi][safi])
attr->weight = peer->weight[afi][safi];
rmap = route_map_lookup_by_name(rmap_name);
/*
* If we have a route map name and we do not find
* the routemap that means we have an implicit
* deny.
*/
if (rmap == NULL)
return RMAP_DENY;
memset(&rmap_path, 0, sizeof(struct bgp_path_info));
/* Route map apply. */
/* Duplicate current value to new strucutre for modification. */
rmap_path.peer = peer;
rmap_path.attr = attr;
rmap_type = peer->rmap_type;
SET_FLAG(peer->rmap_type, PEER_RMAP_TYPE_OUT);
/* Apply BGP route map to the attribute. */
ret = route_map_apply(rmap, p, RMAP_BGP, &rmap_path);
peer->rmap_type = rmap_type;
if (ret == RMAP_DENYMATCH)
/*
* caller has multiple error paths with bgp_attr_flush()
*/
return RMAP_DENY;
return RMAP_PERMIT;
}
/* If this is an EBGP peer with remove-private-AS */
static void bgp_peer_remove_private_as(struct bgp *bgp, afi_t afi, safi_t safi,
struct peer *peer, struct attr *attr)
{
if (peer->sort == BGP_PEER_EBGP
&& (peer_af_flag_check(peer, afi, safi,
PEER_FLAG_REMOVE_PRIVATE_AS_ALL_REPLACE)
|| peer_af_flag_check(peer, afi, safi,
PEER_FLAG_REMOVE_PRIVATE_AS_REPLACE)
|| peer_af_flag_check(peer, afi, safi,
PEER_FLAG_REMOVE_PRIVATE_AS_ALL)
|| peer_af_flag_check(peer, afi, safi,
PEER_FLAG_REMOVE_PRIVATE_AS))) {
// Take action on the entire aspath
if (peer_af_flag_check(peer, afi, safi,
PEER_FLAG_REMOVE_PRIVATE_AS_ALL_REPLACE)
|| peer_af_flag_check(peer, afi, safi,
PEER_FLAG_REMOVE_PRIVATE_AS_ALL)) {
if (peer_af_flag_check(
peer, afi, safi,
PEER_FLAG_REMOVE_PRIVATE_AS_ALL_REPLACE))
attr->aspath = aspath_replace_private_asns(
attr->aspath, bgp->as);
// The entire aspath consists of private ASNs so create
// an empty aspath
else if (aspath_private_as_check(attr->aspath))
attr->aspath = aspath_empty_get();
// There are some public and some private ASNs, remove
// the private ASNs
else
attr->aspath = aspath_remove_private_asns(
attr->aspath);
}
// 'all' was not specified so the entire aspath must be private
// ASNs
// for us to do anything
else if (aspath_private_as_check(attr->aspath)) {
if (peer_af_flag_check(
peer, afi, safi,
PEER_FLAG_REMOVE_PRIVATE_AS_REPLACE))
attr->aspath = aspath_replace_private_asns(
attr->aspath, bgp->as);
else
attr->aspath = aspath_empty_get();
}
}
}
/* If this is an EBGP peer with as-override */
static void bgp_peer_as_override(struct bgp *bgp, afi_t afi, safi_t safi,
struct peer *peer, struct attr *attr)
{
if (peer->sort == BGP_PEER_EBGP
&& peer_af_flag_check(peer, afi, safi, PEER_FLAG_AS_OVERRIDE)) {
if (aspath_single_asn_check(attr->aspath, peer->as))
attr->aspath = aspath_replace_specific_asn(
attr->aspath, peer->as, bgp->as);
}
}
void bgp_attr_add_gshut_community(struct attr *attr)
{
struct community *old;
struct community *new;
struct community *merge;
struct community *gshut;
old = attr->community;
gshut = community_str2com("graceful-shutdown");
assert(gshut);
if (old) {
merge = community_merge(community_dup(old), gshut);
if (old->refcnt == 0)
community_free(&old);
new = community_uniq_sort(merge);
community_free(&merge);
} else {
new = community_dup(gshut);
}
community_free(&gshut);
attr->community = new;
attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_COMMUNITIES);
/* When we add the graceful-shutdown community we must also
* lower the local-preference */
attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_LOCAL_PREF);
attr->local_pref = BGP_GSHUT_LOCAL_PREF;
}
static void subgroup_announce_reset_nhop(uint8_t family, struct attr *attr)
{
if (family == AF_INET) {
attr->nexthop.s_addr = 0;
attr->mp_nexthop_global_in.s_addr = 0;
}
if (family == AF_INET6)
memset(&attr->mp_nexthop_global, 0, IPV6_MAX_BYTELEN);
if (family == AF_EVPN)
memset(&attr->mp_nexthop_global_in, 0, BGP_ATTR_NHLEN_IPV4);
}
int subgroup_announce_check(struct bgp_node *rn, struct bgp_path_info *pi,
struct update_subgroup *subgrp, struct prefix *p,
struct attr *attr)
{
struct bgp_filter *filter;
struct peer *from;
struct peer *peer;
struct peer *onlypeer;
struct bgp *bgp;
struct attr *piattr;
char buf[PREFIX_STRLEN];
int ret;
int transparent;
int reflect;
afi_t afi;
safi_t safi;
int samepeer_safe = 0; /* for synthetic mplsvpns routes */
if (DISABLE_BGP_ANNOUNCE)
return 0;
afi = SUBGRP_AFI(subgrp);
safi = SUBGRP_SAFI(subgrp);
peer = SUBGRP_PEER(subgrp);
onlypeer = NULL;
if (CHECK_FLAG(peer->flags, PEER_FLAG_LONESOUL))
onlypeer = SUBGRP_PFIRST(subgrp)->peer;
from = pi->peer;
filter = &peer->filter[afi][safi];
bgp = SUBGRP_INST(subgrp);
piattr = bgp_path_info_mpath_count(pi) ? bgp_path_info_mpath_attr(pi)
: pi->attr;
#if ENABLE_BGP_VNC
if (((afi == AFI_IP) || (afi == AFI_IP6)) && (safi == SAFI_MPLS_VPN)
&& ((pi->type == ZEBRA_ROUTE_BGP_DIRECT)
|| (pi->type == ZEBRA_ROUTE_BGP_DIRECT_EXT))) {
/*
* direct and direct_ext type routes originate internally even
* though they can have peer pointers that reference other
* systems
*/
prefix2str(p, buf, PREFIX_STRLEN);
zlog_debug("%s: pfx %s bgp_direct->vpn route peer safe",
__func__, buf);
samepeer_safe = 1;
}
#endif
if (((afi == AFI_IP) || (afi == AFI_IP6))
&& ((safi == SAFI_MPLS_VPN) || (safi == SAFI_UNICAST))
&& (pi->type == ZEBRA_ROUTE_BGP)
&& (pi->sub_type == BGP_ROUTE_IMPORTED)) {
/* Applies to routes leaked vpn->vrf and vrf->vpn */
samepeer_safe = 1;
}
/* With addpath we may be asked to TX all kinds of paths so make sure
* pi is valid */
if (!CHECK_FLAG(pi->flags, BGP_PATH_VALID)
|| CHECK_FLAG(pi->flags, BGP_PATH_HISTORY)
|| CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) {
return 0;
}
/* If this is not the bestpath then check to see if there is an enabled
* addpath
* feature that requires us to advertise it */
if (!CHECK_FLAG(pi->flags, BGP_PATH_SELECTED)) {
if (!bgp_addpath_tx_path(peer->addpath_type[afi][safi], pi)) {
return 0;
}
}
/* Aggregate-address suppress check. */
if (pi->extra && pi->extra->suppress)
if (!UNSUPPRESS_MAP_NAME(filter)) {
return 0;
}
/*
* If we are doing VRF 2 VRF leaking via the import
* statement, we want to prevent the route going
* off box as that the RT and RD created are localy
* significant and globaly useless.
*/
if (safi == SAFI_MPLS_VPN && pi->extra && pi->extra->num_labels
&& pi->extra->label[0] == BGP_PREVENT_VRF_2_VRF_LEAK)
return 0;
/* If it's labeled safi, make sure the route has a valid label. */
if (safi == SAFI_LABELED_UNICAST) {
mpls_label_t label = bgp_adv_label(rn, pi, peer, afi, safi);
if (!bgp_is_valid_label(&label)) {
if (bgp_debug_update(NULL, p, subgrp->update_group, 0))
zlog_debug("u%" PRIu64 ":s%" PRIu64
" %s/%d is filtered - no label (%p)",
subgrp->update_group->id, subgrp->id,
inet_ntop(p->family, &p->u.prefix,
buf, SU_ADDRSTRLEN),
p->prefixlen, &label);
return 0;
}
}
/* Do not send back route to sender. */
if (onlypeer && from == onlypeer) {
return 0;
}
/* Do not send the default route in the BGP table if the neighbor is
* configured for default-originate */
if (CHECK_FLAG(peer->af_flags[afi][safi],
PEER_FLAG_DEFAULT_ORIGINATE)) {
if (p->family == AF_INET && p->u.prefix4.s_addr == INADDR_ANY)
return 0;
else if (p->family == AF_INET6 && p->prefixlen == 0)
return 0;
}
/* Transparency check. */
if (CHECK_FLAG(peer->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT)
&& CHECK_FLAG(from->af_flags[afi][safi], PEER_FLAG_RSERVER_CLIENT))
transparent = 1;
else
transparent = 0;
/* If community is not disabled check the no-export and local. */
if (!transparent && bgp_community_filter(peer, piattr)) {
if (bgp_debug_update(NULL, p, subgrp->update_group, 0))
zlog_debug(
"subgrpannouncecheck: community filter check fail");
return 0;
}
/* If the attribute has originator-id and it is same as remote
peer's id. */
if (onlypeer && piattr->flag & ATTR_FLAG_BIT(BGP_ATTR_ORIGINATOR_ID)
&& (IPV4_ADDR_SAME(&onlypeer->remote_id, &piattr->originator_id))) {
if (bgp_debug_update(NULL, p, subgrp->update_group, 0))
zlog_debug(
"%s [Update:SEND] %s originator-id is same as "
"remote router-id",
onlypeer->host,
prefix2str(p, buf, sizeof(buf)));
return 0;
}
/* ORF prefix-list filter check */
if (CHECK_FLAG(peer->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_RM_ADV)
&& (CHECK_FLAG(peer->af_cap[afi][safi], PEER_CAP_ORF_PREFIX_SM_RCV)
|| CHECK_FLAG(peer->af_cap[afi][safi],
PEER_CAP_ORF_PREFIX_SM_OLD_RCV)))
if (peer->orf_plist[afi][safi]) {
if (prefix_list_apply(peer->orf_plist[afi][safi], p)
== PREFIX_DENY) {
if (bgp_debug_update(NULL, p,
subgrp->update_group, 0))
zlog_debug(
"%s [Update:SEND] %s is filtered via ORF",
peer->host,
prefix2str(p, buf,
sizeof(buf)));
return 0;
}
}
/* Output filter check. */
if (bgp_output_filter(peer, p, piattr, afi, safi) == FILTER_DENY) {
if (bgp_debug_update(NULL, p, subgrp->update_group, 0))
zlog_debug("%s [Update:SEND] %s is filtered",
peer->host, prefix2str(p, buf, sizeof(buf)));
return 0;
}
#ifdef BGP_SEND_ASPATH_CHECK
/* AS path loop check. */
if (onlypeer && aspath_loop_check(piattr->aspath, onlypeer->as)) {
if (bgp_debug_update(NULL, p, subgrp->update_group, 0))
zlog_debug(
"%s [Update:SEND] suppress announcement to peer AS %u "
"that is part of AS path.",
onlypeer->host, onlypeer->as);
return 0;
}
#endif /* BGP_SEND_ASPATH_CHECK */
/* If we're a CONFED we need to loop check the CONFED ID too */
if (CHECK_FLAG(bgp->config, BGP_CONFIG_CONFEDERATION)) {
if (aspath_loop_check(piattr->aspath, bgp->confed_id)) {
if (bgp_debug_update(NULL, p, subgrp->update_group, 0))
zlog_debug(
"%s [Update:SEND] suppress announcement to peer AS %u"
" is AS path.",
peer->host, bgp->confed_id);
return 0;
}
}
/* Route-Reflect check. */
if (from->sort == BGP_PEER_IBGP && peer->sort == BGP_PEER_IBGP)
reflect = 1;
else
reflect = 0;
/* IBGP reflection check. */
if (reflect && !samepeer_safe) {
/* A route from a Client peer. */
if (CHECK_FLAG(from->af_flags[afi][safi],
PEER_FLAG_REFLECTOR_CLIENT)) {
/* Reflect to all the Non-Client peers and also to the
Client peers other than the originator. Originator
check
is already done. So there is noting to do. */
/* no bgp client-to-client reflection check. */
if (bgp_flag_check(bgp, BGP_FLAG_NO_CLIENT_TO_CLIENT))
if (CHECK_FLAG(peer->af_flags[afi][safi],
PEER_FLAG_REFLECTOR_CLIENT))
return 0;
} else {
/* A route from a Non-client peer. Reflect to all other
clients. */
if (!CHECK_FLAG(peer->af_flags[afi][safi],
PEER_FLAG_REFLECTOR_CLIENT))
return 0;
}
}
/* For modify attribute, copy it to temporary structure. */
bgp_attr_dup(attr, piattr);
/* If local-preference is not set. */
if ((peer->sort == BGP_PEER_IBGP || peer->sort == BGP_PEER_CONFED)
&& (!(attr->flag & ATTR_FLAG_BIT(BGP_ATTR_LOCAL_PREF)))) {
attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_LOCAL_PREF);
attr->local_pref = bgp->default_local_pref;
}
/* If originator-id is not set and the route is to be reflected,
set the originator id */
if (reflect
&& (!(attr->flag & ATTR_FLAG_BIT(BGP_ATTR_ORIGINATOR_ID)))) {
IPV4_ADDR_COPY(&(attr->originator_id), &(from->remote_id));
SET_FLAG(attr->flag, BGP_ATTR_ORIGINATOR_ID);
}
/* Remove MED if its an EBGP peer - will get overwritten by route-maps
*/
if (peer->sort == BGP_PEER_EBGP
&& attr->flag & ATTR_FLAG_BIT(BGP_ATTR_MULTI_EXIT_DISC)) {
if (from != bgp->peer_self && !transparent
&& !CHECK_FLAG(peer->af_flags[afi][safi],
PEER_FLAG_MED_UNCHANGED))
attr->flag &=
~(ATTR_FLAG_BIT(BGP_ATTR_MULTI_EXIT_DISC));
}
/* Since the nexthop attribute can vary per peer, it is not explicitly
* set
* in announce check, only certain flags and length (or number of
* nexthops
* -- for IPv6/MP_REACH) are set here in order to guide the update
* formation
* code in setting the nexthop(s) on a per peer basis in
* reformat_peer().
* Typically, the source nexthop in the attribute is preserved but in
* the
* scenarios where we know it will always be overwritten, we reset the
* nexthop to "0" in an attempt to achieve better Update packing. An
* example of this is when a prefix from each of 2 IBGP peers needs to
* be
* announced to an EBGP peer (and they have the same attributes barring
* their nexthop).
*/
if (reflect)
SET_FLAG(attr->rmap_change_flags, BATTR_REFLECTED);
#define NEXTHOP_IS_V6 \
((safi != SAFI_ENCAP && safi != SAFI_MPLS_VPN \
&& (p->family == AF_INET6 || peer_cap_enhe(peer, afi, safi))) \
|| ((safi == SAFI_ENCAP || safi == SAFI_MPLS_VPN) \
&& attr->mp_nexthop_len >= IPV6_MAX_BYTELEN))
/* IPv6/MP starts with 1 nexthop. The link-local address is passed only
* if
* the peer (group) is configured to receive link-local nexthop
* unchanged
* and it is available in the prefix OR we're not reflecting the route
* and
* the peer (group) to whom we're going to announce is on a shared
* network
* and this is either a self-originated route or the peer is EBGP.
*/
if (NEXTHOP_IS_V6) {
attr->mp_nexthop_len = BGP_ATTR_NHLEN_IPV6_GLOBAL;
if ((CHECK_FLAG(peer->af_flags[afi][safi],
PEER_FLAG_NEXTHOP_LOCAL_UNCHANGED)
&& IN6_IS_ADDR_LINKLOCAL(&attr->mp_nexthop_local))
|| (!reflect && peer->shared_network
&& (from == bgp->peer_self
|| peer->sort == BGP_PEER_EBGP))) {
attr->mp_nexthop_len =
BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL;
}
/* Clear off link-local nexthop in source, whenever it is not
* needed to
* ensure more prefixes share the same attribute for
* announcement.
*/
if (!(CHECK_FLAG(peer->af_flags[afi][safi],
PEER_FLAG_NEXTHOP_LOCAL_UNCHANGED)))
memset(&attr->mp_nexthop_local, 0, IPV6_MAX_BYTELEN);
}
bgp_peer_remove_private_as(bgp, afi, safi, peer, attr);
bgp_peer_as_override(bgp, afi, safi, peer, attr);
/* Route map & unsuppress-map apply. */
if (ROUTE_MAP_OUT_NAME(filter) || (pi->extra && pi->extra->suppress)) {
struct bgp_path_info rmap_path;
struct bgp_path_info_extra dummy_rmap_path_extra;
struct attr dummy_attr;
memset(&rmap_path, 0, sizeof(struct bgp_path_info));
rmap_path.peer = peer;
rmap_path.attr = attr;
if (pi->extra) {
memcpy(&dummy_rmap_path_extra, pi->extra,
sizeof(struct bgp_path_info_extra));
rmap_path.extra = &dummy_rmap_path_extra;
}
/* don't confuse inbound and outbound setting */
RESET_FLAG(attr->rmap_change_flags);
/*
* The route reflector is not allowed to modify the attributes
* of the reflected IBGP routes unless explicitly allowed.
*/
if ((from->sort == BGP_PEER_IBGP && peer->sort == BGP_PEER_IBGP)
&& !bgp_flag_check(bgp,
BGP_FLAG_RR_ALLOW_OUTBOUND_POLICY)) {
bgp_attr_dup(&dummy_attr, attr);
rmap_path.attr = &dummy_attr;
}
SET_FLAG(peer->rmap_type, PEER_RMAP_TYPE_OUT);
if (pi->extra && pi->extra->suppress)
ret = route_map_apply(UNSUPPRESS_MAP(filter), p,
RMAP_BGP, &rmap_path);
else
ret = route_map_apply(ROUTE_MAP_OUT(filter), p,
RMAP_BGP, &rmap_path);
peer->rmap_type = 0;
if (ret == RMAP_DENYMATCH) {
bgp_attr_flush(attr);
return 0;
}
}
/* RFC 8212 to prevent route leaks.
* This specification intends to improve this situation by requiring the
* explicit configuration of both BGP Import and Export Policies for any
* External BGP (EBGP) session such as customers, peers, or
* confederation boundaries for all enabled address families. Through
* codification of the aforementioned requirement, operators will
* benefit from consistent behavior across different BGP
* implementations.
*/
if (peer->bgp->ebgp_requires_policy
== DEFAULT_EBGP_POLICY_ENABLED)
if (!bgp_outbound_policy_exists(peer, filter))
return 0;
if (bgp_flag_check(bgp, BGP_FLAG_GRACEFUL_SHUTDOWN)) {
if (peer->sort == BGP_PEER_IBGP
|| peer->sort == BGP_PEER_CONFED) {
attr->flag |= ATTR_FLAG_BIT(BGP_ATTR_LOCAL_PREF);
attr->local_pref = BGP_GSHUT_LOCAL_PREF;
} else {
bgp_attr_add_gshut_community(attr);
}
}
/* After route-map has been applied, we check to see if the nexthop to
* be carried in the attribute (that is used for the announcement) can
* be cleared off or not. We do this in all cases where we would be
* setting the nexthop to "ourselves". For IPv6, we only need to
* consider
* the global nexthop here; the link-local nexthop would have been
* cleared
* already, and if not, it is required by the update formation code.
* Also see earlier comments in this function.
*/
/*
* If route-map has performed some operation on the nexthop or the peer
* configuration says to pass it unchanged, we cannot reset the nexthop
* here, so only attempt to do it if these aren't true. Note that the
* route-map handler itself might have cleared the nexthop, if for
* example,
* it is configured as 'peer-address'.
*/
if (!bgp_rmap_nhop_changed(attr->rmap_change_flags,
piattr->rmap_change_flags)
&& !transparent
&& !CHECK_FLAG(peer->af_flags[afi][safi],
PEER_FLAG_NEXTHOP_UNCHANGED)) {
/* We can reset the nexthop, if setting (or forcing) it to
* 'self' */
if (CHECK_FLAG(peer->af_flags[afi][safi],
PEER_FLAG_NEXTHOP_SELF)
|| CHECK_FLAG(peer->af_flags[afi][safi],
PEER_FLAG_FORCE_NEXTHOP_SELF)) {
if (!reflect
|| CHECK_FLAG(peer->af_flags[afi][safi],
PEER_FLAG_FORCE_NEXTHOP_SELF))
subgroup_announce_reset_nhop(
(peer_cap_enhe(peer, afi, safi)
? AF_INET6
: p->family),
attr);
} else if (peer->sort == BGP_PEER_EBGP) {
/* Can also reset the nexthop if announcing to EBGP, but
* only if
* no peer in the subgroup is on a shared subnet.
* Note: 3rd party nexthop currently implemented for
* IPv4 only.
*/
if (!bgp_subgrp_multiaccess_check_v4(piattr->nexthop,
subgrp))
subgroup_announce_reset_nhop(
(peer_cap_enhe(peer, afi, safi)
? AF_INET6
: p->family),
attr);
} else if (CHECK_FLAG(pi->flags, BGP_PATH_ANNC_NH_SELF)) {
/*
* This flag is used for leaked vpn-vrf routes
*/
int family = p->family;
if (peer_cap_enhe(peer, afi, safi))
family = AF_INET6;
if (bgp_debug_update(NULL, p, subgrp->update_group, 0))
zlog_debug(
"%s: BGP_PATH_ANNC_NH_SELF, family=%s",
__func__, family2str(family));
subgroup_announce_reset_nhop(family, attr);
}
/* If IPv6/MP and nexthop does not have any override and happens
* to
* be a link-local address, reset it so that we don't pass along
* the
* source's link-local IPv6 address to recipients who may not be
* on
* the same interface.
*/
if (p->family == AF_INET6 || peer_cap_enhe(peer, afi, safi)) {
if (IN6_IS_ADDR_LINKLOCAL(&attr->mp_nexthop_global))
subgroup_announce_reset_nhop(AF_INET6, attr);
}
}
return 1;
}
void bgp_best_selection(struct bgp *bgp, struct bgp_node *rn,
struct bgp_maxpaths_cfg *mpath_cfg,
struct bgp_path_info_pair *result, afi_t afi,
safi_t safi)
{
struct bgp_path_info *new_select;
struct bgp_path_info *old_select;
struct bgp_path_info *pi;
struct bgp_path_info *pi1;
struct bgp_path_info *pi2;
struct bgp_path_info *nextpi = NULL;
int paths_eq, do_mpath, debug;
struct list mp_list;
char pfx_buf[PREFIX2STR_BUFFER];
char path_buf[PATH_ADDPATH_STR_BUFFER];
bgp_mp_list_init(&mp_list);
do_mpath =
(mpath_cfg->maxpaths_ebgp > 1 || mpath_cfg->maxpaths_ibgp > 1);
debug = bgp_debug_bestpath(&rn->p);
if (debug)
prefix2str(&rn->p, pfx_buf, sizeof(pfx_buf));
/* bgp deterministic-med */
new_select = NULL;
if (bgp_flag_check(bgp, BGP_FLAG_DETERMINISTIC_MED)) {
/* Clear BGP_PATH_DMED_SELECTED for all paths */
for (pi1 = bgp_node_get_bgp_path_info(rn); pi1;
pi1 = pi1->next)
bgp_path_info_unset_flag(rn, pi1,
BGP_PATH_DMED_SELECTED);
for (pi1 = bgp_node_get_bgp_path_info(rn); pi1;
pi1 = pi1->next) {
if (CHECK_FLAG(pi1->flags, BGP_PATH_DMED_CHECK))
continue;
if (BGP_PATH_HOLDDOWN(pi1))
continue;
if (pi1->peer && pi1->peer != bgp->peer_self)
if (pi1->peer->status != Established)
continue;
new_select = pi1;
if (pi1->next) {
for (pi2 = pi1->next; pi2; pi2 = pi2->next) {
if (CHECK_FLAG(pi2->flags,
BGP_PATH_DMED_CHECK))
continue;
if (BGP_PATH_HOLDDOWN(pi2))
continue;
if (pi2->peer
&& pi2->peer != bgp->peer_self
&& !CHECK_FLAG(
pi2->peer->sflags,
PEER_STATUS_NSF_WAIT))
if (pi2->peer->status
!= Established)
continue;
if (!aspath_cmp_left(pi1->attr->aspath,
pi2->attr->aspath)
&& !aspath_cmp_left_confed(
pi1->attr->aspath,
pi2->attr->aspath))
continue;
if (bgp_path_info_cmp(
bgp, pi2, new_select,
&paths_eq, mpath_cfg, debug,
pfx_buf, afi, safi)) {
bgp_path_info_unset_flag(
rn, new_select,
BGP_PATH_DMED_SELECTED);
new_select = pi2;
}
bgp_path_info_set_flag(
rn, pi2, BGP_PATH_DMED_CHECK);
}
}
bgp_path_info_set_flag(rn, new_select,
BGP_PATH_DMED_CHECK);
bgp_path_info_set_flag(rn, new_select,
BGP_PATH_DMED_SELECTED);
if (debug) {
bgp_path_info_path_with_addpath_rx_str(
new_select, path_buf);
zlog_debug("%s: %s is the bestpath from AS %u",
pfx_buf, path_buf,
aspath_get_first_as(
new_select->attr->aspath));
}
}
}
/* Check old selected route and new selected route. */
old_select = NULL;
new_select = NULL;
for (pi = bgp_node_get_bgp_path_info(rn);
(pi != NULL) && (nextpi = pi->next, 1); pi = nextpi) {
if (CHECK_FLAG(pi->flags, BGP_PATH_SELECTED))
old_select = pi;
if (BGP_PATH_HOLDDOWN(pi)) {
/* reap REMOVED routes, if needs be
* selected route must stay for a while longer though
*/
if (CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)
&& (pi != old_select))
bgp_path_info_reap(rn, pi);
if (debug)
zlog_debug("%s: pi %p in holddown", __func__,
pi);
continue;
}
if (pi->peer && pi->peer != bgp->peer_self
&& !CHECK_FLAG(pi->peer->sflags, PEER_STATUS_NSF_WAIT))
if (pi->peer->status != Established) {
if (debug)
zlog_debug(
"%s: pi %p non self peer %s not estab state",
__func__, pi, pi->peer->host);
continue;
}
if (bgp_flag_check(bgp, BGP_FLAG_DETERMINISTIC_MED)
&& (!CHECK_FLAG(pi->flags, BGP_PATH_DMED_SELECTED))) {
bgp_path_info_unset_flag(rn, pi, BGP_PATH_DMED_CHECK);
if (debug)
zlog_debug("%s: pi %p dmed", __func__, pi);
continue;
}
bgp_path_info_unset_flag(rn, pi, BGP_PATH_DMED_CHECK);
if (bgp_path_info_cmp(bgp, pi, new_select, &paths_eq, mpath_cfg,
debug, pfx_buf, afi, safi)) {
new_select = pi;
}
}
/* Now that we know which path is the bestpath see if any of the other
* paths
* qualify as multipaths
*/
if (debug) {
if (new_select)
bgp_path_info_path_with_addpath_rx_str(new_select,
path_buf);
else
sprintf(path_buf, "NONE");
zlog_debug(
"%s: After path selection, newbest is %s oldbest was %s",
pfx_buf, path_buf,
old_select ? old_select->peer->host : "NONE");
}
if (do_mpath && new_select) {
for (pi = bgp_node_get_bgp_path_info(rn);
(pi != NULL) && (nextpi = pi->next, 1); pi = nextpi) {
if (debug)
bgp_path_info_path_with_addpath_rx_str(
pi, path_buf);
if (pi == new_select) {
if (debug)
zlog_debug(
"%s: %s is the bestpath, add to the multipath list",
pfx_buf, path_buf);
bgp_mp_list_add(&mp_list, pi);
continue;
}
if (BGP_PATH_HOLDDOWN(pi))
continue;
if (pi->peer && pi->peer != bgp->peer_self
&& !CHECK_FLAG(pi->peer->sflags,
PEER_STATUS_NSF_WAIT))
if (pi->peer->status != Established)
continue;
if (!bgp_path_info_nexthop_cmp(pi, new_select)) {
if (debug)
zlog_debug(
"%s: %s has the same nexthop as the bestpath, skip it",
pfx_buf, path_buf);
continue;
}
bgp_path_info_cmp(bgp, pi, new_select, &paths_eq,
mpath_cfg, debug, pfx_buf, afi, safi);
if (paths_eq) {
if (debug)
zlog_debug(
"%s: %s is equivalent to the bestpath, add to the multipath list",
pfx_buf, path_buf);
bgp_mp_list_add(&mp_list, pi);
}
}
}
bgp_path_info_mpath_update(rn, new_select, old_select, &mp_list,
mpath_cfg);
bgp_path_info_mpath_aggregate_update(new_select, old_select);
bgp_mp_list_clear(&mp_list);
bgp_addpath_update_ids(bgp, rn, afi, safi);
result->old = old_select;
result->new = new_select;
return;
}
/*
* A new route/change in bestpath of an existing route. Evaluate the path
* for advertisement to the subgroup.
*/
int subgroup_process_announce_selected(struct update_subgroup *subgrp,
struct bgp_path_info *selected,
struct bgp_node *rn,
uint32_t addpath_tx_id)
{
struct prefix *p;
struct peer *onlypeer;
struct attr attr;
afi_t afi;
safi_t safi;
p = &rn->p;
afi = SUBGRP_AFI(subgrp);
safi = SUBGRP_SAFI(subgrp);
onlypeer = ((SUBGRP_PCOUNT(subgrp) == 1) ? (SUBGRP_PFIRST(subgrp))->peer
: NULL);
if (BGP_DEBUG(update, UPDATE_OUT)) {
char buf_prefix[PREFIX_STRLEN];
prefix2str(p, buf_prefix, sizeof(buf_prefix));
zlog_debug("%s: p=%s, selected=%p", __func__, buf_prefix,
selected);
}
/* First update is deferred until ORF or ROUTE-REFRESH is received */
if (onlypeer && CHECK_FLAG(onlypeer->af_sflags[afi][safi],
PEER_STATUS_ORF_WAIT_REFRESH))
return 0;
memset(&attr, 0, sizeof(struct attr));
/* It's initialized in bgp_announce_check() */
/* Announcement to the subgroup. If the route is filtered withdraw it.
*/
if (selected) {
if (subgroup_announce_check(rn, selected, subgrp, p, &attr))
bgp_adj_out_set_subgroup(rn, subgrp, &attr, selected);
else
bgp_adj_out_unset_subgroup(rn, subgrp, 1,
addpath_tx_id);
}
/* If selected is NULL we must withdraw the path using addpath_tx_id */
else {
bgp_adj_out_unset_subgroup(rn, subgrp, 1, addpath_tx_id);
}
return 0;
}
/*
* Clear IGP changed flag and attribute changed flag for a route (all paths).
* This is called at the end of route processing.
*/
void bgp_zebra_clear_route_change_flags(struct bgp_node *rn)
{
struct bgp_path_info *pi;
for (pi = bgp_node_get_bgp_path_info(rn); pi; pi = pi->next) {
if (BGP_PATH_HOLDDOWN(pi))
continue;
UNSET_FLAG(pi->flags, BGP_PATH_IGP_CHANGED);
UNSET_FLAG(pi->flags, BGP_PATH_ATTR_CHANGED);
}
}
/*
* Has the route changed from the RIB's perspective? This is invoked only
* if the route selection returns the same best route as earlier - to
* determine if we need to update zebra or not.
*/
int bgp_zebra_has_route_changed(struct bgp_node *rn,
struct bgp_path_info *selected)
{
struct bgp_path_info *mpinfo;
/* If this is multipath, check all selected paths for any nexthop
* change or attribute change. Some attribute changes (e.g., community)
* aren't of relevance to the RIB, but we'll update zebra to ensure
* we handle the case of BGP nexthop change. This is the behavior
* when the best path has an attribute change anyway.
*/
if (CHECK_FLAG(selected->flags, BGP_PATH_IGP_CHANGED)
|| CHECK_FLAG(selected->flags, BGP_PATH_MULTIPATH_CHG))
return 1;
/*
* If this is multipath, check all selected paths for any nexthop change
*/
for (mpinfo = bgp_path_info_mpath_first(selected); mpinfo;
mpinfo = bgp_path_info_mpath_next(mpinfo)) {
if (CHECK_FLAG(mpinfo->flags, BGP_PATH_IGP_CHANGED)
|| CHECK_FLAG(mpinfo->flags, BGP_PATH_ATTR_CHANGED))
return 1;
}
/* Nothing has changed from the RIB's perspective. */
return 0;
}
struct bgp_process_queue {
struct bgp *bgp;
STAILQ_HEAD(, bgp_node) pqueue;
#define BGP_PROCESS_QUEUE_EOIU_MARKER (1 << 0)
unsigned int flags;
unsigned int queued;
};
/*
* old_select = The old best path
* new_select = the new best path
*
* if (!old_select && new_select)
* We are sending new information on.
*
* if (old_select && new_select) {
* if (new_select != old_select)
* We have a new best path send a change
* else
* We've received a update with new attributes that needs
* to be passed on.
* }
*
* if (old_select && !new_select)
* We have no eligible route that we can announce or the rn
* is being removed.
*/
static void bgp_process_main_one(struct bgp *bgp, struct bgp_node *rn,
afi_t afi, safi_t safi)
{
struct bgp_path_info *new_select;
struct bgp_path_info *old_select;
struct bgp_path_info_pair old_and_new;
char pfx_buf[PREFIX2STR_BUFFER];
int debug = 0;
/* Is it end of initial update? (after startup) */
if (!rn) {
quagga_timestamp(3, bgp->update_delay_zebra_resume_time,
sizeof(bgp->update_delay_zebra_resume_time));
bgp->main_zebra_update_hold = 0;
FOREACH_AFI_SAFI (afi, safi) {
if (bgp_fibupd_safi(safi))
bgp_zebra_announce_table(bgp, afi, safi);
}
bgp->main_peers_update_hold = 0;
bgp_start_routeadv(bgp);
return;
}
struct prefix *p = &rn->p;
debug = bgp_debug_bestpath(&rn->p);
if (debug) {
prefix2str(&rn->p, pfx_buf, sizeof(pfx_buf));
zlog_debug("%s: p=%s afi=%s, safi=%s start", __func__, pfx_buf,
afi2str(afi), safi2str(safi));
}
/* Best path selection. */
bgp_best_selection(bgp, rn, &bgp->maxpaths[afi][safi], &old_and_new,
afi, safi);
old_select = old_and_new.old;
new_select = old_and_new.new;
/* Do we need to allocate or free labels?
* Right now, since we only deal with per-prefix labels, it is not
* necessary to do this upon changes to best path. Exceptions:
* - label index has changed -> recalculate resulting label
* - path_info sub_type changed -> switch to/from implicit-null
* - no valid label (due to removed static label binding) -> get new one
*/
if (bgp->allocate_mpls_labels[afi][safi]) {
if (new_select) {
if (!old_select
|| bgp_label_index_differs(new_select, old_select)
|| new_select->sub_type != old_select->sub_type
|| !bgp_is_valid_label(&rn->local_label)) {
/* Enforced penultimate hop popping:
* implicit-null for local routes, aggregate
* and redistributed routes
*/
if (new_select->sub_type == BGP_ROUTE_STATIC
|| new_select->sub_type
== BGP_ROUTE_AGGREGATE
|| new_select->sub_type
== BGP_ROUTE_REDISTRIBUTE) {
if (CHECK_FLAG(
rn->flags,
BGP_NODE_REGISTERED_FOR_LABEL))
bgp_unregister_for_label(rn);
label_ntop(MPLS_LABEL_IMPLICIT_NULL, 1,
&rn->local_label);
bgp_set_valid_label(&rn->local_label);
} else
bgp_register_for_label(rn, new_select);
}
} else if (CHECK_FLAG(rn->flags,
BGP_NODE_REGISTERED_FOR_LABEL)) {
bgp_unregister_for_label(rn);
}
} else if (CHECK_FLAG(rn->flags, BGP_NODE_REGISTERED_FOR_LABEL)) {
bgp_unregister_for_label(rn);
}
if (debug) {
prefix2str(&rn->p, pfx_buf, sizeof(pfx_buf));
zlog_debug(
"%s: p=%s afi=%s, safi=%s, old_select=%p, new_select=%p",
__func__, pfx_buf, afi2str(afi), safi2str(safi),
old_select, new_select);
}
/* If best route remains the same and this is not due to user-initiated
* clear, see exactly what needs to be done.
*/
if (old_select && old_select == new_select
&& !CHECK_FLAG(rn->flags, BGP_NODE_USER_CLEAR)
&& !CHECK_FLAG(old_select->flags, BGP_PATH_ATTR_CHANGED)
&& !bgp_addpath_is_addpath_used(&bgp->tx_addpath, afi, safi)) {
if (bgp_zebra_has_route_changed(rn, old_select)) {
#if ENABLE_BGP_VNC
vnc_import_bgp_add_route(bgp, p, old_select);
vnc_import_bgp_exterior_add_route(bgp, p, old_select);
#endif
if (bgp_fibupd_safi(safi)
&& !bgp_option_check(BGP_OPT_NO_FIB)) {
if (new_select->type == ZEBRA_ROUTE_BGP
&& (new_select->sub_type == BGP_ROUTE_NORMAL
|| new_select->sub_type
== BGP_ROUTE_IMPORTED))
bgp_zebra_announce(rn, p, old_select,
bgp, afi, safi);
}
}
UNSET_FLAG(old_select->flags, BGP_PATH_MULTIPATH_CHG);
bgp_zebra_clear_route_change_flags(rn);
/* If there is a change of interest to peers, reannounce the
* route. */
if (CHECK_FLAG(old_select->flags, BGP_PATH_ATTR_CHANGED)
|| CHECK_FLAG(rn->flags, BGP_NODE_LABEL_CHANGED)) {
group_announce_route(bgp, afi, safi, rn, new_select);
/* unicast routes must also be annouced to
* labeled-unicast update-groups */
if (safi == SAFI_UNICAST)
group_announce_route(bgp, afi,
SAFI_LABELED_UNICAST, rn,
new_select);
UNSET_FLAG(old_select->flags, BGP_PATH_ATTR_CHANGED);
UNSET_FLAG(rn->flags, BGP_NODE_LABEL_CHANGED);
}
UNSET_FLAG(rn->flags, BGP_NODE_PROCESS_SCHEDULED);
return;
}
/* If the user did "clear ip bgp prefix x.x.x.x" this flag will be set
*/
UNSET_FLAG(rn->flags, BGP_NODE_USER_CLEAR);
/* bestpath has changed; bump version */
if (old_select || new_select) {
bgp_bump_version(rn);
if (!bgp->t_rmap_def_originate_eval) {
bgp_lock(bgp);
thread_add_timer(
bm->master,
update_group_refresh_default_originate_route_map,
bgp, RMAP_DEFAULT_ORIGINATE_EVAL_TIMER,
&bgp->t_rmap_def_originate_eval);
}
}
if (old_select)
bgp_path_info_unset_flag(rn, old_select, BGP_PATH_SELECTED);
if (new_select) {
if (debug)
zlog_debug("%s: setting SELECTED flag", __func__);
bgp_path_info_set_flag(rn, new_select, BGP_PATH_SELECTED);
bgp_path_info_unset_flag(rn, new_select, BGP_PATH_ATTR_CHANGED);
UNSET_FLAG(new_select->flags, BGP_PATH_MULTIPATH_CHG);
}
#if ENABLE_BGP_VNC
if ((afi == AFI_IP || afi == AFI_IP6) && (safi == SAFI_UNICAST)) {
if (old_select != new_select) {
if (old_select) {
vnc_import_bgp_exterior_del_route(bgp, p,
old_select);
vnc_import_bgp_del_route(bgp, p, old_select);
}
if (new_select) {
vnc_import_bgp_exterior_add_route(bgp, p,
new_select);
vnc_import_bgp_add_route(bgp, p, new_select);
}
}
}
#endif
group_announce_route(bgp, afi, safi, rn, new_select);
/* unicast routes must also be annouced to labeled-unicast update-groups
*/
if (safi == SAFI_UNICAST)
group_announce_route(bgp, afi, SAFI_LABELED_UNICAST, rn,
new_select);
/* FIB update. */
if (bgp_fibupd_safi(safi) && (bgp->inst_type != BGP_INSTANCE_TYPE_VIEW)
&& !bgp_option_check(BGP_OPT_NO_FIB)) {
if (new_select && new_select->type == ZEBRA_ROUTE_BGP
&& (new_select->sub_type == BGP_ROUTE_NORMAL
|| new_select->sub_type == BGP_ROUTE_AGGREGATE
|| new_select->sub_type == BGP_ROUTE_IMPORTED)) {
/* if this is an evpn imported type-5 prefix,
* we need to withdraw the route first to clear
* the nh neigh and the RMAC entry.
*/
if (old_select &&
is_route_parent_evpn(old_select))
bgp_zebra_withdraw(p, old_select, bgp, safi);
bgp_zebra_announce(rn, p, new_select, bgp, afi, safi);
} else {
/* Withdraw the route from the kernel. */
if (old_select && old_select->type == ZEBRA_ROUTE_BGP
&& (old_select->sub_type == BGP_ROUTE_NORMAL
|| old_select->sub_type == BGP_ROUTE_AGGREGATE
|| old_select->sub_type == BGP_ROUTE_IMPORTED))
bgp_zebra_withdraw(p, old_select, bgp, safi);
}
}
/* advertise/withdraw type-5 routes */
if ((afi == AFI_IP || afi == AFI_IP6) && (safi == SAFI_UNICAST)) {
if (advertise_type5_routes(bgp, afi) &&
new_select &&
is_route_injectable_into_evpn(new_select)) {
/* apply the route-map */
if (bgp->adv_cmd_rmap[afi][safi].map) {
int ret = 0;
ret = route_map_apply(
bgp->adv_cmd_rmap[afi][safi].map,
&rn->p, RMAP_BGP, new_select);
if (ret == RMAP_MATCH)
bgp_evpn_advertise_type5_route(
bgp, &rn->p, new_select->attr,
afi, safi);
else
bgp_evpn_withdraw_type5_route(
bgp, &rn->p, afi, safi);
} else {
bgp_evpn_advertise_type5_route(bgp,
&rn->p,
new_select->attr,
afi, safi);
}
} else if (advertise_type5_routes(bgp, afi) &&
old_select &&
is_route_injectable_into_evpn(old_select))
bgp_evpn_withdraw_type5_route(bgp, &rn->p, afi, safi);
}
/* Clear any route change flags. */
bgp_zebra_clear_route_change_flags(rn);
/* Reap old select bgp_path_info, if it has been removed */
if (old_select && CHECK_FLAG(old_select->flags, BGP_PATH_REMOVED))
bgp_path_info_reap(rn, old_select);
UNSET_FLAG(rn->flags, BGP_NODE_PROCESS_SCHEDULED);
return;
}
static wq_item_status bgp_process_wq(struct work_queue *wq, void *data)
{
struct bgp_process_queue *pqnode = data;
struct bgp *bgp = pqnode->bgp;
struct bgp_table *table;
struct bgp_node *rn;
/* eoiu marker */
if (CHECK_FLAG(pqnode->flags, BGP_PROCESS_QUEUE_EOIU_MARKER)) {
bgp_process_main_one(bgp, NULL, 0, 0);
/* should always have dedicated wq call */
assert(STAILQ_FIRST(&pqnode->pqueue) == NULL);
return WQ_SUCCESS;
}
while (!STAILQ_EMPTY(&pqnode->pqueue)) {
rn = STAILQ_FIRST(&pqnode->pqueue);
STAILQ_REMOVE_HEAD(&pqnode->pqueue, pq);
STAILQ_NEXT(rn, pq) = NULL; /* complete unlink */
table = bgp_node_table(rn);
/* note, new RNs may be added as part of processing */
bgp_process_main_one(bgp, rn, table->afi, table->safi);
bgp_unlock_node(rn);
bgp_table_unlock(table);
}
return WQ_SUCCESS;
}
static void bgp_processq_del(struct work_queue *wq, void *data)
{
struct bgp_process_queue *pqnode = data;
bgp_unlock(pqnode->bgp);
XFREE(MTYPE_BGP_PROCESS_QUEUE, pqnode);
}
void bgp_process_queue_init(void)
{
if (!bm->process_main_queue)
bm->process_main_queue =
work_queue_new(bm->master, "process_main_queue");
bm->process_main_queue->spec.workfunc = &bgp_process_wq;
bm->process_main_queue->spec.del_item_data = &bgp_processq_del;
bm->process_main_queue->spec.max_retries = 0;
bm->process_main_queue->spec.hold = 50;
/* Use a higher yield value of 50ms for main queue processing */
bm->process_main_queue->spec.yield = 50 * 1000L;
}
static struct bgp_process_queue *bgp_processq_alloc(struct bgp *bgp)
{
struct bgp_process_queue *pqnode;
pqnode = XCALLOC(MTYPE_BGP_PROCESS_QUEUE,
sizeof(struct bgp_process_queue));
/* unlocked in bgp_processq_del */
pqnode->bgp = bgp_lock(bgp);
STAILQ_INIT(&pqnode->pqueue);
return pqnode;
}
void bgp_process(struct bgp *bgp, struct bgp_node *rn, afi_t afi, safi_t safi)
{
#define ARBITRARY_PROCESS_QLEN 10000
struct work_queue *wq = bm->process_main_queue;
struct bgp_process_queue *pqnode;
int pqnode_reuse = 0;
/* already scheduled for processing? */
if (CHECK_FLAG(rn->flags, BGP_NODE_PROCESS_SCHEDULED))
return;
if (wq == NULL)
return;
/* Add route nodes to an existing work queue item until reaching the
limit only if is from the same BGP view and it's not an EOIU marker
*/
if (work_queue_item_count(wq)) {
struct work_queue_item *item = work_queue_last_item(wq);
pqnode = item->data;
if (CHECK_FLAG(pqnode->flags, BGP_PROCESS_QUEUE_EOIU_MARKER)
|| pqnode->bgp != bgp
|| pqnode->queued >= ARBITRARY_PROCESS_QLEN)
pqnode = bgp_processq_alloc(bgp);
else
pqnode_reuse = 1;
} else
pqnode = bgp_processq_alloc(bgp);
/* all unlocked in bgp_process_wq */
bgp_table_lock(bgp_node_table(rn));
SET_FLAG(rn->flags, BGP_NODE_PROCESS_SCHEDULED);
bgp_lock_node(rn);
/* can't be enqueued twice */
assert(STAILQ_NEXT(rn, pq) == NULL);
STAILQ_INSERT_TAIL(&pqnode->pqueue, rn, pq);
pqnode->queued++;
if (!pqnode_reuse)
work_queue_add(wq, pqnode);
return;
}
void bgp_add_eoiu_mark(struct bgp *bgp)
{
struct bgp_process_queue *pqnode;
if (bm->process_main_queue == NULL)
return;
pqnode = bgp_processq_alloc(bgp);
SET_FLAG(pqnode->flags, BGP_PROCESS_QUEUE_EOIU_MARKER);
work_queue_add(bm->process_main_queue, pqnode);
}
static int bgp_maximum_prefix_restart_timer(struct thread *thread)
{
struct peer *peer;
peer = THREAD_ARG(thread);
peer->t_pmax_restart = NULL;
if (bgp_debug_neighbor_events(peer))
zlog_debug(
"%s Maximum-prefix restart timer expired, restore peering",
peer->host);
if ((peer_clear(peer, NULL) < 0) && bgp_debug_neighbor_events(peer))
zlog_debug("%s: %s peer_clear failed",
__PRETTY_FUNCTION__, peer->host);
return 0;
}
int bgp_maximum_prefix_overflow(struct peer *peer, afi_t afi, safi_t safi,
int always)
{
iana_afi_t pkt_afi;
iana_safi_t pkt_safi;
if (!CHECK_FLAG(peer->af_flags[afi][safi], PEER_FLAG_MAX_PREFIX))
return 0;
if (peer->pcount[afi][safi] > peer->pmax[afi][safi]) {
if (CHECK_FLAG(peer->af_sflags[afi][safi],
PEER_STATUS_PREFIX_LIMIT)
&& !always)
return 0;
zlog_info(
"%%MAXPFXEXCEED: No. of %s prefix received from %s %ld exceed, "
"limit %ld",
afi_safi_print(afi, safi), peer->host,
peer->pcount[afi][safi], peer->pmax[afi][safi]);
SET_FLAG(peer->af_sflags[afi][safi], PEER_STATUS_PREFIX_LIMIT);
if (CHECK_FLAG(peer->af_flags[afi][safi],
PEER_FLAG_MAX_PREFIX_WARNING))
return 0;
/* Convert AFI, SAFI to values for packet. */
pkt_afi = afi_int2iana(afi);
pkt_safi = safi_int2iana(safi);
{
uint8_t ndata[7];
ndata[0] = (pkt_afi >> 8);
ndata[1] = pkt_afi;
ndata[2] = pkt_safi;
ndata[3] = (peer->pmax[afi][safi] >> 24);
ndata[4] = (peer->pmax[afi][safi] >> 16);
ndata[5] = (peer->pmax[afi][safi] >> 8);
ndata[6] = (peer->pmax[afi][safi]);
SET_FLAG(peer->sflags, PEER_STATUS_PREFIX_OVERFLOW);
bgp_notify_send_with_data(peer, BGP_NOTIFY_CEASE,
BGP_NOTIFY_CEASE_MAX_PREFIX,
ndata, 7);
}
/* Dynamic peers will just close their connection. */
if (peer_dynamic_neighbor(peer))
return 1;
/* restart timer start */
if (peer->pmax_restart[afi][safi]) {
peer->v_pmax_restart =
peer->pmax_restart[afi][safi] * 60;
if (bgp_debug_neighbor_events(peer))
zlog_debug(
"%s Maximum-prefix restart timer started for %d secs",
peer->host, peer->v_pmax_restart);
BGP_TIMER_ON(peer->t_pmax_restart,
bgp_maximum_prefix_restart_timer,
peer->v_pmax_restart);
}
return 1;
} else
UNSET_FLAG(peer->af_sflags[afi][safi],
PEER_STATUS_PREFIX_LIMIT);
if (peer->pcount[afi][safi]
> (peer->pmax[afi][safi] * peer->pmax_threshold[afi][safi] / 100)) {
if (CHECK_FLAG(peer->af_sflags[afi][safi],
PEER_STATUS_PREFIX_THRESHOLD)
&& !always)
return 0;
zlog_info(
"%%MAXPFX: No. of %s prefix received from %s reaches %ld, max %ld",
afi_safi_print(afi, safi), peer->host,
peer->pcount[afi][safi], peer->pmax[afi][safi]);
SET_FLAG(peer->af_sflags[afi][safi],
PEER_STATUS_PREFIX_THRESHOLD);
} else
UNSET_FLAG(peer->af_sflags[afi][safi],
PEER_STATUS_PREFIX_THRESHOLD);
return 0;
}
/* Unconditionally remove the route from the RIB, without taking
* damping into consideration (eg, because the session went down)
*/
void bgp_rib_remove(struct bgp_node *rn, struct bgp_path_info *pi,
struct peer *peer, afi_t afi, safi_t safi)
{
bgp_aggregate_decrement(peer->bgp, &rn->p, pi, afi, safi);
if (!CHECK_FLAG(pi->flags, BGP_PATH_HISTORY))
bgp_path_info_delete(rn, pi); /* keep historical info */
bgp_process(peer->bgp, rn, afi, safi);
}
static void bgp_rib_withdraw(struct bgp_node *rn, struct bgp_path_info *pi,
struct peer *peer, afi_t afi, safi_t safi,
struct prefix_rd *prd)
{
/* apply dampening, if result is suppressed, we'll be retaining
* the bgp_path_info in the RIB for historical reference.
*/
if (CHECK_FLAG(peer->bgp->af_flags[afi][safi], BGP_CONFIG_DAMPENING)
&& peer->sort == BGP_PEER_EBGP)
if ((bgp_damp_withdraw(pi, rn, afi, safi, 0))
== BGP_DAMP_SUPPRESSED) {
bgp_aggregate_decrement(peer->bgp, &rn->p, pi, afi,
safi);
return;
}
#if ENABLE_BGP_VNC
if (safi == SAFI_MPLS_VPN) {
struct bgp_node *prn = NULL;
struct bgp_table *table = NULL;
prn = bgp_node_get(peer->bgp->rib[afi][safi],
(struct prefix *)prd);
if (bgp_node_has_bgp_path_info_data(prn)) {
table = bgp_node_get_bgp_table_info(prn);
vnc_import_bgp_del_vnc_host_route_mode_resolve_nve(
peer->bgp, prd, table, &rn->p, pi);
}
bgp_unlock_node(prn);
}
if ((afi == AFI_IP || afi == AFI_IP6) && (safi == SAFI_UNICAST)) {
if (CHECK_FLAG(pi->flags, BGP_PATH_SELECTED)) {
vnc_import_bgp_del_route(peer->bgp, &rn->p, pi);
vnc_import_bgp_exterior_del_route(peer->bgp, &rn->p,
pi);
}
}
#endif
/* If this is an EVPN route, process for un-import. */
if (safi == SAFI_EVPN)
bgp_evpn_unimport_route(peer->bgp, afi, safi, &rn->p, pi);
bgp_rib_remove(rn, pi, peer, afi, safi);
}
struct bgp_path_info *info_make(int type, int sub_type, unsigned short instance,
struct peer *peer, struct attr *attr,
struct bgp_node *rn)
{
struct bgp_path_info *new;
/* Make new BGP info. */
new = XCALLOC(MTYPE_BGP_ROUTE, sizeof(struct bgp_path_info));
new->type = type;
new->instance = instance;
new->sub_type = sub_type;
new->peer = peer;
new->attr = attr;
new->uptime = bgp_clock();
new->net = rn;
return new;
}
static void overlay_index_update(struct attr *attr,
struct eth_segment_id *eth_s_id,
union gw_addr *gw_ip)
{
if (!attr)
return;
if (eth_s_id == NULL) {
memset(&(attr->evpn_overlay.eth_s_id), 0,
sizeof(struct eth_segment_id));
} else {
memcpy(&(attr->evpn_overlay.eth_s_id), eth_s_id,
sizeof(struct eth_segment_id));
}
if (gw_ip == NULL) {
memset(&(attr->evpn_overlay.gw_ip), 0, sizeof(union gw_addr));
} else {
memcpy(&(attr->evpn_overlay.gw_ip), gw_ip,
sizeof(union gw_addr));
}
}
static bool overlay_index_equal(afi_t afi, struct bgp_path_info *path,
struct eth_segment_id *eth_s_id,
union gw_addr *gw_ip)
{
struct eth_segment_id *path_eth_s_id, *path_eth_s_id_remote;
union gw_addr *path_gw_ip, *path_gw_ip_remote;
union {
struct eth_segment_id esi;
union gw_addr ip;
} temp;
if (afi != AFI_L2VPN)
return true;
if (!path->attr) {
memset(&temp, 0, sizeof(temp));
path_eth_s_id = &temp.esi;
path_gw_ip = &temp.ip;
if (eth_s_id == NULL && gw_ip == NULL)
return true;
} else {
path_eth_s_id = &(path->attr->evpn_overlay.eth_s_id);
path_gw_ip = &(path->attr->evpn_overlay.gw_ip);
}
if (gw_ip == NULL) {
memset(&temp, 0, sizeof(temp));
path_gw_ip_remote = &temp.ip;
} else
path_gw_ip_remote = gw_ip;
if (eth_s_id == NULL) {
memset(&temp, 0, sizeof(temp));
path_eth_s_id_remote = &temp.esi;
} else
path_eth_s_id_remote = eth_s_id;
if (!memcmp(path_gw_ip, path_gw_ip_remote, sizeof(union gw_addr)))
return false;
return !memcmp(path_eth_s_id, path_eth_s_id_remote,
sizeof(struct eth_segment_id));
}
/* Check if received nexthop is valid or not. */
static int bgp_update_martian_nexthop(struct bgp *bgp, afi_t afi, safi_t safi,
struct attr *attr)
{
int ret = 0;
/* Only validated for unicast and multicast currently. */
/* Also valid for EVPN where the nexthop is an IP address. */
if (safi != SAFI_UNICAST && safi != SAFI_MULTICAST && safi != SAFI_EVPN)
return 0;
/* If NEXT_HOP is present, validate it. */
if (attr->flag & ATTR_FLAG_BIT(BGP_ATTR_NEXT_HOP)) {
if (attr->nexthop.s_addr == 0
|| IPV4_CLASS_DE(ntohl(attr->nexthop.s_addr))
|| bgp_nexthop_self(bgp, attr->nexthop))
return 1;
}
/* If MP_NEXTHOP is present, validate it. */
/* Note: For IPv6 nexthops, we only validate the global (1st) nexthop;
* there is code in bgp_attr.c to ignore the link-local (2nd) nexthop if
* it is not an IPv6 link-local address.
*/
if (attr->mp_nexthop_len) {
switch (attr->mp_nexthop_len) {
case BGP_ATTR_NHLEN_IPV4:
case BGP_ATTR_NHLEN_VPNV4:
ret = (attr->mp_nexthop_global_in.s_addr == 0
|| IPV4_CLASS_DE(ntohl(
attr->mp_nexthop_global_in.s_addr))
|| bgp_nexthop_self(bgp,
attr->mp_nexthop_global_in));
break;
case BGP_ATTR_NHLEN_IPV6_GLOBAL:
case BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL:
case BGP_ATTR_NHLEN_VPNV6_GLOBAL:
ret = (IN6_IS_ADDR_UNSPECIFIED(&attr->mp_nexthop_global)
|| IN6_IS_ADDR_LOOPBACK(&attr->mp_nexthop_global)
|| IN6_IS_ADDR_MULTICAST(
&attr->mp_nexthop_global));
break;
default:
ret = 1;
break;
}
}
return ret;
}
int bgp_update(struct peer *peer, struct prefix *p, uint32_t addpath_id,
struct attr *attr, afi_t afi, safi_t safi, int type,
int sub_type, struct prefix_rd *prd, mpls_label_t *label,
uint32_t num_labels, int soft_reconfig,
struct bgp_route_evpn *evpn)
{
int ret;
int aspath_loop_count = 0;
struct bgp_node *rn;
struct bgp *bgp;
struct attr new_attr;
struct attr *attr_new;
struct bgp_path_info *pi;
struct bgp_path_info *new;
struct bgp_path_info_extra *extra;
const char *reason;
char pfx_buf[BGP_PRD_PATH_STRLEN];
int connected = 0;
int do_loop_check = 1;
int has_valid_label = 0;
#if ENABLE_BGP_VNC
int vnc_implicit_withdraw = 0;
#endif
int same_attr = 0;
memset(&new_attr, 0, sizeof(struct attr));
new_attr.label_index = BGP_INVALID_LABEL_INDEX;
new_attr.label = MPLS_INVALID_LABEL;
bgp = peer->bgp;
rn = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi, p, prd);
/* TODO: Check to see if we can get rid of "is_valid_label" */
if (afi == AFI_L2VPN && safi == SAFI_EVPN)
has_valid_label = (num_labels > 0) ? 1 : 0;
else
has_valid_label = bgp_is_valid_label(label);
/* When peer's soft reconfiguration enabled. Record input packet in
Adj-RIBs-In. */
if (!soft_reconfig
&& CHECK_FLAG(peer->af_flags[afi][safi], PEER_FLAG_SOFT_RECONFIG)
&& peer != bgp->peer_self)
bgp_adj_in_set(rn, peer, attr, addpath_id);
/* Check previously received route. */
for (pi = bgp_node_get_bgp_path_info(rn); pi; pi = pi->next)
if (pi->peer == peer && pi->type == type
&& pi->sub_type == sub_type
&& pi->addpath_rx_id == addpath_id)
break;
/* AS path local-as loop check. */
if (peer->change_local_as) {
if (peer->allowas_in[afi][safi])
aspath_loop_count = peer->allowas_in[afi][safi];
else if (!CHECK_FLAG(peer->flags,
PEER_FLAG_LOCAL_AS_NO_PREPEND))
aspath_loop_count = 1;
if (aspath_loop_check(attr->aspath, peer->change_local_as)
> aspath_loop_count) {
reason = "as-path contains our own AS;";
goto filtered;
}
}
/* If the peer is configured for "allowas-in origin" and the last ASN in
* the
* as-path is our ASN then we do not need to call aspath_loop_check
*/
if (CHECK_FLAG(peer->af_flags[afi][safi], PEER_FLAG_ALLOWAS_IN_ORIGIN))
if (aspath_get_last_as(attr->aspath) == bgp->as)
do_loop_check = 0;
/* AS path loop check. */
if (do_loop_check) {
if (aspath_loop_check(attr->aspath, bgp->as)
> peer->allowas_in[afi][safi]
|| (CHECK_FLAG(bgp->config, BGP_CONFIG_CONFEDERATION)
&& aspath_loop_check(attr->aspath, bgp->confed_id)
> peer->allowas_in[afi][safi])) {
reason = "as-path contains our own AS;";
goto filtered;
}
}
/* Route reflector originator ID check. */
if (attr->flag & ATTR_FLAG_BIT(BGP_ATTR_ORIGINATOR_ID)
&& IPV4_ADDR_SAME(&bgp->router_id, &attr->originator_id)) {
reason = "originator is us;";
goto filtered;
}
/* Route reflector cluster ID check. */
if (bgp_cluster_filter(peer, attr)) {
reason = "reflected from the same cluster;";
goto filtered;
}
/* Apply incoming filter. */
if (bgp_input_filter(peer, p, attr, afi, safi) == FILTER_DENY) {
reason = "filter;";
goto filtered;
}
bgp_attr_dup(&new_attr, attr);
/* Apply incoming route-map.
* NB: new_attr may now contain newly allocated values from route-map
* "set"
* commands, so we need bgp_attr_flush in the error paths, until we
* intern
* the attr (which takes over the memory references) */
if (bgp_input_modifier(peer, p, &new_attr, afi, safi, NULL)
== RMAP_DENY) {
reason = "route-map;";
bgp_attr_flush(&new_attr);
goto filtered;
}
if (peer->sort == BGP_PEER_EBGP) {
/* If we receive the graceful-shutdown community from an eBGP
* peer we must lower local-preference */
if (new_attr.community
&& community_include(new_attr.community, COMMUNITY_GSHUT)) {
new_attr.flag |= ATTR_FLAG_BIT(BGP_ATTR_LOCAL_PREF);
new_attr.local_pref = BGP_GSHUT_LOCAL_PREF;
/* If graceful-shutdown is configured then add the GSHUT
* community to all paths received from eBGP peers */
} else if (bgp_flag_check(peer->bgp,
BGP_FLAG_GRACEFUL_SHUTDOWN)) {
bgp_attr_add_gshut_community(&new_attr);
}
}
/* next hop check. */
if (!CHECK_FLAG(peer->flags, PEER_FLAG_IS_RFAPI_HD)
&& bgp_update_martian_nexthop(bgp, afi, safi, &new_attr)) {
reason = "martian or self next-hop;";
bgp_attr_flush(&new_attr);
goto filtered;
}
if (bgp_mac_entry_exists(p) || bgp_mac_exist(&attr->rmac)) {
reason = "self mac;";
goto filtered;
}
attr_new = bgp_attr_intern(&new_attr);
/* If the update is implicit withdraw. */
if (pi) {
pi->uptime = bgp_clock();
same_attr = attrhash_cmp(pi->attr, attr_new);
/* Same attribute comes in. */
if (!CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)
&& attrhash_cmp(pi->attr, attr_new)
&& (!has_valid_label
|| memcmp(&(bgp_path_info_extra_get(pi))->label, label,
num_labels * sizeof(mpls_label_t))
== 0)
&& (overlay_index_equal(
afi, pi, evpn == NULL ? NULL : &evpn->eth_s_id,
evpn == NULL ? NULL : &evpn->gw_ip))) {
if (CHECK_FLAG(bgp->af_flags[afi][safi],
BGP_CONFIG_DAMPENING)
&& peer->sort == BGP_PEER_EBGP
&& CHECK_FLAG(pi->flags, BGP_PATH_HISTORY)) {
if (bgp_debug_update(peer, p, NULL, 1)) {
bgp_debug_rdpfxpath2str(
afi, safi, prd, p, label,
num_labels, addpath_id ? 1 : 0,
addpath_id, pfx_buf,
sizeof(pfx_buf));
zlog_debug("%s rcvd %s", peer->host,
pfx_buf);
}
if (bgp_damp_update(pi, rn, afi, safi)
!= BGP_DAMP_SUPPRESSED) {
bgp_aggregate_increment(bgp, p, pi, afi,
safi);
bgp_process(bgp, rn, afi, safi);
}
} else /* Duplicate - odd */
{
if (bgp_debug_update(peer, p, NULL, 1)) {
if (!peer->rcvd_attr_printed) {
zlog_debug(
"%s rcvd UPDATE w/ attr: %s",
peer->host,
peer->rcvd_attr_str);
peer->rcvd_attr_printed = 1;
}
bgp_debug_rdpfxpath2str(
afi, safi, prd, p, label,
num_labels, addpath_id ? 1 : 0,
addpath_id, pfx_buf,
sizeof(pfx_buf));
zlog_debug(
"%s rcvd %s...duplicate ignored",
peer->host, pfx_buf);
}
/* graceful restart STALE flag unset. */
if (CHECK_FLAG(pi->flags, BGP_PATH_STALE)) {
bgp_path_info_unset_flag(
rn, pi, BGP_PATH_STALE);
bgp_process(bgp, rn, afi, safi);
}
}
bgp_unlock_node(rn);
bgp_attr_unintern(&attr_new);
return 0;
}
/* Withdraw/Announce before we fully processed the withdraw */
if (CHECK_FLAG(pi->flags, BGP_PATH_REMOVED)) {
if (bgp_debug_update(peer, p, NULL, 1)) {
bgp_debug_rdpfxpath2str(
afi, safi, prd, p, label, num_labels,
addpath_id ? 1 : 0, addpath_id, pfx_buf,
sizeof(pfx_buf));
zlog_debug(
"%s rcvd %s, flapped quicker than processing",
peer->host, pfx_buf);
}
bgp_path_info_restore(rn, pi);
}
/* Received Logging. */
if (bgp_debug_update(peer, p, NULL, 1)) {
bgp_debug_rdpfxpath2str(afi, safi, prd, p, label,
num_labels, addpath_id ? 1 : 0,
addpath_id, pfx_buf,
sizeof(pfx_buf));
zlog_debug("%s rcvd %s", peer->host, pfx_buf);
}
/* graceful restart STALE flag unset. */
if (CHECK_FLAG(pi->flags, BGP_PATH_STALE))
bgp_path_info_unset_flag(rn, pi, BGP_PATH_STALE);
/* The attribute is changed. */
bgp_path_info_set_flag(rn, pi, BGP_PATH_ATTR_CHANGED);
/* implicit withdraw, decrement aggregate and pcount here.
* only if update is accepted, they'll increment below.
*/
bgp_aggregate_decrement(bgp, p, pi, afi, safi);
/* Update bgp route dampening information. */
if (CHECK_FLAG(bgp->af_flags[afi][safi], BGP_CONFIG_DAMPENING)
&& peer->sort == BGP_PEER_EBGP) {
/* This is implicit withdraw so we should update
dampening
information. */
if (!CHECK_FLAG(pi->flags, BGP_PATH_HISTORY))
bgp_damp_withdraw(pi, rn, afi, safi, 1);
}
#if ENABLE_BGP_VNC
if (safi == SAFI_MPLS_VPN) {
struct bgp_node *prn = NULL;
struct bgp_table *table = NULL;
prn = bgp_node_get(bgp->rib[afi][safi],
(struct prefix *)prd);
if (bgp_node_has_bgp_path_info_data(prn)) {
table = bgp_node_get_bgp_table_info(prn);
vnc_import_bgp_del_vnc_host_route_mode_resolve_nve(
bgp, prd, table, p, pi);
}
bgp_unlock_node(prn);
}
if ((afi == AFI_IP || afi == AFI_IP6)
&& (safi == SAFI_UNICAST)) {
if (CHECK_FLAG(pi->flags, BGP_PATH_SELECTED)) {
/*
* Implicit withdraw case.
*/
++vnc_implicit_withdraw;
vnc_import_bgp_del_route(bgp, p, pi);
vnc_import_bgp_exterior_del_route(bgp, p, pi);
}
}
#endif
/* Special handling for EVPN update of an existing route. If the
* extended community attribute has changed, we need to
* un-import
* the route using its existing extended community. It will be
* subsequently processed for import with the new extended
* community.
*/
if (safi == SAFI_EVPN && !same_attr) {
if ((pi->attr->flag
& ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES))
&& (attr_new->flag
& ATTR_FLAG_BIT(BGP_ATTR_EXT_COMMUNITIES))) {
int cmp;
cmp = ecommunity_cmp(pi->attr->ecommunity,
attr_new->ecommunity);
if (!cmp) {
if (bgp_debug_update(peer, p, NULL, 1))
zlog_debug(
"Change in EXT-COMM, existing %s new %s",
ecommunity_str(
pi->attr->ecommunity),
ecommunity_str(
attr_new->ecommunity));
bgp_evpn_unimport_route(bgp, afi, safi,
p, pi);
}
}
}
/* Update to new attribute. */
bgp_attr_unintern(&pi->attr);
pi->attr = attr_new;
/* Update MPLS label */
if (has_valid_label) {
extra = bgp_path_info_extra_get(pi);
if (extra->label != label) {
memcpy(&extra->label, label,
num_labels * sizeof(mpls_label_t));
extra->num_labels = num_labels;
}
if (!(afi == AFI_L2VPN && safi == SAFI_EVPN))
bgp_set_valid_label(&extra->label[0]);
}
#if ENABLE_BGP_VNC
if ((afi == AFI_IP || afi == AFI_IP6)
&& (safi == SAFI_UNICAST)) {
if (vnc_implicit_withdraw) {
/*
* Add back the route with its new attributes
* (e.g., nexthop).
* The route is still selected, until the route
* selection
* queued by bgp_process actually runs. We have
* to make this
* update to the VNC side immediately to avoid
* racing against
* configuration changes (e.g., route-map
* changes) which
* trigger re-importation of the entire RIB.
*/
vnc_import_bgp_add_route(bgp, p, pi);
vnc_import_bgp_exterior_add_route(bgp, p, pi);
}
}
#endif
/* Update Overlay Index */
if (afi == AFI_L2VPN) {
overlay_index_update(
pi->attr, evpn == NULL ? NULL : &evpn->eth_s_id,
evpn == NULL ? NULL : &evpn->gw_ip);
}
/* Update bgp route dampening information. */
if (CHECK_FLAG(bgp->af_flags[afi][safi], BGP_CONFIG_DAMPENING)
&& peer->sort == BGP_PEER_EBGP) {
/* Now we do normal update dampening. */
ret = bgp_damp_update(pi, rn, afi, safi);
if (ret == BGP_DAMP_SUPPRESSED) {
bgp_unlock_node(rn);
return 0;
}
}
/* Nexthop reachability check - for unicast and
* labeled-unicast.. */
if ((afi == AFI_IP || afi == AFI_IP6)
&& (safi == SAFI_UNICAST || safi == SAFI_LABELED_UNICAST)) {
if (peer->sort == BGP_PEER_EBGP && peer->ttl == 1
&& !CHECK_FLAG(peer->flags,
PEER_FLAG_DISABLE_CONNECTED_CHECK)
&& !bgp_flag_check(
bgp, BGP_FLAG_DISABLE_NH_CONNECTED_CHK))
connected = 1;
else
connected = 0;
struct bgp *bgp_nexthop = bgp;
if (pi->extra && pi->extra->bgp_orig)
bgp_nexthop = pi->extra->bgp_orig;
if (bgp_find_or_add_nexthop(bgp, bgp_nexthop, afi, pi,
NULL, connected)
|| CHECK_FLAG(peer->flags, PEER_FLAG_IS_RFAPI_HD))
bgp_path_info_set_flag(rn, pi, BGP_PATH_VALID);
else {
if (BGP_DEBUG(nht, NHT)) {
char buf1[INET6_ADDRSTRLEN];
inet_ntop(AF_INET,
(const void *)&attr_new
->nexthop,
buf1, INET6_ADDRSTRLEN);
zlog_debug("%s(%s): NH unresolved",
__FUNCTION__, buf1);
}
bgp_path_info_unset_flag(rn, pi,
BGP_PATH_VALID);
}
} else
bgp_path_info_set_flag(rn, pi, BGP_PATH_VALID);
#if ENABLE_BGP_VNC
if (safi == SAFI_MPLS_VPN) {
struct bgp_node *prn = NULL;
struct bgp_table *table = NULL;
prn = bgp_node_get(bgp->rib[afi][safi],
(struct prefix *)prd);
if (bgp_node_has_bgp_path_info_data(prn)) {
table = bgp_node_get_bgp_table_info(prn);
vnc_import_bgp_add_vnc_host_route_mode_resolve_nve(
bgp, prd, table, p, pi);
}
bgp_unlock_node(prn);
}
#endif
/* If this is an EVPN route and some attribute has changed,
* process
* route for import. If the extended community has changed, we
* would
* have done the un-import earlier and the import would result
* in the
* route getting injected into appropriate L2 VNIs. If it is
* just
* some other attribute change, the import will result in
* updating
* the attributes for the route in the VNI(s).
*/
if (safi == SAFI_EVPN && !same_attr)
bgp_evpn_import_route(bgp, afi, safi, p, pi);
/* Process change. */
bgp_aggregate_increment(bgp, p, pi, afi, safi);
bgp_process(bgp, rn, afi, safi);
bgp_unlock_node(rn);
if (SAFI_UNICAST == safi
&& (bgp->inst_type == BGP_INSTANCE_TYPE_VRF
|| bgp->inst_type == BGP_INSTANCE_TYPE_DEFAULT)) {
vpn_leak_from_vrf_update(bgp_get_default(), bgp, pi);
}
if ((SAFI_MPLS_VPN == safi)
&& (bgp->inst_type == BGP_INSTANCE_TYPE_DEFAULT)) {
vpn_leak_to_vrf_update(bgp, pi);
}
#if ENABLE_BGP_VNC
if (SAFI_MPLS_VPN == safi) {
mpls_label_t label_decoded = decode_label(label);
rfapiProcessUpdate(peer, NULL, p, prd, attr, afi, safi,
type, sub_type, &label_decoded);
}
if (SAFI_ENCAP == safi) {
rfapiProcessUpdate(peer, NULL, p, prd, attr, afi, safi,
type, sub_type, NULL);
}
#endif
return 0;
} // End of implicit withdraw
/* Received Logging. */
if (bgp_debug_update(peer, p, NULL, 1)) {
if (!peer->rcvd_attr_printed) {
zlog_debug("%s rcvd UPDATE w/ attr: %s", peer->host,
peer->rcvd_attr_str);
peer->rcvd_attr_printed = 1;
}
bgp_debug_rdpfxpath2str(afi, safi, prd, p, label, num_labels,
addpath_id ? 1 : 0, addpath_id, pfx_buf,
sizeof(pfx_buf));
zlog_debug("%s rcvd %s", peer->host, pfx_buf);
}
/* Make new BGP info. */
new = info_make(type, sub_type, 0, peer, attr_new, rn);
/* Update MPLS label */
if (has_valid_label) {
extra = bgp_path_info_extra_get(new);
if (extra->label != label) {
memcpy(&extra->label, label,
num_labels * sizeof(mpls_label_t));
extra->num_labels = num_labels;
}
if (!(afi == AFI_L2VPN && safi == SAFI_EVPN))
bgp_set_valid_label(&extra->label[0]);
}
/* Update Overlay Index */
if (afi == AFI_L2VPN) {
overlay_index_update(new->attr,
evpn == NULL ? NULL : &evpn->eth_s_id,
evpn == NULL ? NULL : &evpn->gw_ip);
}
/* Nexthop reachability check. */
if ((afi == AFI_IP || afi == AFI_IP6)
&& (safi == SAFI_UNICAST || safi == SAFI_LABELED_UNICAST)) {
if (peer->sort == BGP_PEER_EBGP && peer->ttl == 1
&& !CHECK_FLAG(peer->flags,
PEER_FLAG_DISABLE_CONNECTED_CHECK)
&& !bgp_flag_check(bgp, BGP_FLAG_DISABLE_NH_CONNECTED_CHK))
connected = 1;
else
connected = 0;
if (bgp_find_or_add_nexthop(bgp, bgp, afi, new, NULL, connected)
|| CHECK_FLAG(peer->flags, PEER_FLAG_IS_RFAPI_HD))
bgp_path_info_set_flag(rn, new, BGP_PATH_VALID);
else {
if (BGP_DEBUG(nht, NHT)) {
char buf1[INET6_ADDRSTRLEN];
inet_ntop(AF_INET,
(const void *)&attr_new->nexthop,
buf1, INET6_ADDRSTRLEN);
zlog_debug("%s(%s): NH unresolved",
__FUNCTION__, buf1);
}
bgp_path_info_unset_flag(rn, new, BGP_PATH_VALID);
}
} else
bgp_path_info_set_flag(rn, new, BGP_PATH_VALID);
/* Addpath ID */
new->addpath_rx_id = addpath_id;
/* Increment prefix */
bgp_aggregate_increment(bgp, p, new, afi, safi);
/* Register new BGP information. */
bgp_path_info_add(rn, new);
/* route_node_get lock */
bgp_unlock_node(rn);
#if ENABLE_BGP_VNC
if (safi == SAFI_MPLS_VPN) {
struct bgp_node *prn = NULL;
struct bgp_table *table = NULL;
prn = bgp_node_get(bgp->rib[afi][safi], (struct prefix *)prd);
if (bgp_node_has_bgp_path_info_data(prn)) {
table = bgp_node_get_bgp_table_info(prn);
vnc_import_bgp_add_vnc_host_route_mode_resolve_nve(
bgp, prd, table, p, new);
}
bgp_unlock_node(prn);
}
#endif
/* If maximum prefix count is configured and current prefix
count exeed it. */
if (bgp_maximum_prefix_overflow(peer, afi, safi, 0))
return -1;
/* If this is an EVPN route, process for import. */
if (safi == SAFI_EVPN)
bgp_evpn_import_route(bgp, afi, safi, p, new);
/* Process change. */
bgp_process(bgp, rn, afi, safi);
if (SAFI_UNICAST == safi
&& (bgp->inst_type == BGP_INSTANCE_TYPE_VRF
|| bgp->inst_type == BGP_INSTANCE_TYPE_DEFAULT)) {
vpn_leak_from_vrf_update(bgp_get_default(), bgp, new);
}
if ((SAFI_MPLS_VPN == safi)
&& (bgp->inst_type == BGP_INSTANCE_TYPE_DEFAULT)) {
vpn_leak_to_vrf_update(bgp, new);
}
#if ENABLE_BGP_VNC
if (SAFI_MPLS_VPN == safi) {
mpls_label_t label_decoded = decode_label(label);
rfapiProcessUpdate(peer, NULL, p, prd, attr, afi, safi, type,
sub_type, &label_decoded);
}
if (SAFI_ENCAP == safi) {
rfapiProcessUpdate(peer, NULL, p, prd, attr, afi, safi, type,
sub_type, NULL);
}
#endif
return 0;
/* This BGP update is filtered. Log the reason then update BGP
entry. */
filtered:
if (bgp_debug_update(peer, p, NULL, 1)) {
if (!peer->rcvd_attr_printed) {
zlog_debug("%s rcvd UPDATE w/ attr: %s", peer->host,
peer->rcvd_attr_str);
peer->rcvd_attr_printed = 1;
}
bgp_debug_rdpfxpath2str(afi, safi, prd, p, label, num_labels,
addpath_id ? 1 : 0, addpath_id, pfx_buf,
sizeof(pfx_buf));
zlog_debug("%s rcvd UPDATE about %s -- DENIED due to: %s",
peer->host, pfx_buf, reason);
}
if (pi) {
/* If this is an EVPN route, un-import it as it is now filtered.
*/
if (safi == SAFI_EVPN)
bgp_evpn_unimport_route(bgp, afi, safi, p, pi);
if (SAFI_UNICAST == safi
&& (bgp->inst_type == BGP_INSTANCE_TYPE_VRF
|| bgp->inst_type == BGP_INSTANCE_TYPE_DEFAULT)) {
vpn_leak_from_vrf_withdraw(bgp_get_default(), bgp, pi);
}
if ((SAFI_MPLS_VPN == safi)
&& (bgp->inst_type == BGP_INSTANCE_TYPE_DEFAULT)) {
vpn_leak_to_vrf_withdraw(bgp, pi);
}
bgp_rib_remove(rn, pi, peer, afi, safi);
}
bgp_unlock_node(rn);
#if ENABLE_BGP_VNC
/*
* Filtered update is treated as an implicit withdrawal (see
* bgp_rib_remove()
* a few lines above)
*/
if ((SAFI_MPLS_VPN == safi) || (SAFI_ENCAP == safi)) {
rfapiProcessWithdraw(peer, NULL, p, prd, NULL, afi, safi, type,
0);
}
#endif
return 0;
}
int bgp_withdraw(struct peer *peer, struct prefix *p, uint32_t addpath_id,
struct attr *attr, afi_t afi, safi_t safi, int type,
int sub_type, struct prefix_rd *prd, mpls_label_t *label,
uint32_t num_labels, struct bgp_route_evpn *evpn)
{
struct bgp *bgp;
char pfx_buf[BGP_PRD_PATH_STRLEN];
struct bgp_node *rn;
struct bgp_path_info *pi;
#if ENABLE_BGP_VNC
if ((SAFI_MPLS_VPN == safi) || (SAFI_ENCAP == safi)) {
rfapiProcessWithdraw(peer, NULL, p, prd, NULL, afi, safi, type,
0);
}
#endif
bgp = peer->bgp;
/* Lookup node. */
rn = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi, p, prd);
/* If peer is soft reconfiguration enabled. Record input packet for
* further calculation.
*
* Cisco IOS 12.4(24)T4 on session establishment sends withdraws for all
* routes that are filtered. This tanks out Quagga RS pretty badly due
* to
* the iteration over all RS clients.
* Since we need to remove the entry from adj_in anyway, do that first
* and
* if there was no entry, we don't need to do anything more.
*/
if (CHECK_FLAG(peer->af_flags[afi][safi], PEER_FLAG_SOFT_RECONFIG)
&& peer != bgp->peer_self)
if (!bgp_adj_in_unset(rn, peer, addpath_id)) {
if (bgp_debug_update(peer, p, NULL, 1)) {
bgp_debug_rdpfxpath2str(
afi, safi, prd, p, label, num_labels,
addpath_id ? 1 : 0, addpath_id, pfx_buf,
sizeof(pfx_buf));
zlog_debug(
"%s withdrawing route %s not in adj-in",
peer->host, pfx_buf);
}
bgp_unlock_node(rn);
return 0;
}
/* Lookup withdrawn route. */
for (pi = bgp_node_get_bgp_path_info(rn); pi; pi = pi->next)
if (pi->peer == peer && pi->type == type
&& pi->sub_type == sub_type
&& pi->addpath_rx_id == addpath_id)
break;
/* Logging. */
if (bgp_debug_update(peer, p, NULL, 1)) {
bgp_debug_rdpfxpath2str(afi, safi, prd, p, label, num_labels,
addpath_id ? 1 : 0, addpath_id, pfx_buf,
sizeof(pfx_buf));
zlog_debug("%s rcvd UPDATE about %s -- withdrawn", peer->host,
pfx_buf);
}
/* Withdraw specified route from routing table. */
if (pi && !CHECK_FLAG(pi->flags, BGP_PATH_HISTORY)) {
bgp_rib_withdraw(rn, pi, peer, afi, safi, prd);
if (SAFI_UNICAST == safi
&& (bgp->inst_type == BGP_INSTANCE_TYPE_VRF
|| bgp->inst_type == BGP_INSTANCE_TYPE_DEFAULT)) {
vpn_leak_from_vrf_withdraw(bgp_get_default(), bgp, pi);
}
if ((SAFI_MPLS_VPN == safi)
&& (bgp->inst_type == BGP_INSTANCE_TYPE_DEFAULT)) {
vpn_leak_to_vrf_withdraw(bgp, pi);
}
} else if (bgp_debug_update(peer, p, NULL, 1)) {
bgp_debug_rdpfxpath2str(afi, safi, prd, p, label, num_labels,
addpath_id ? 1 : 0, addpath_id, pfx_buf,
sizeof(pfx_buf));
zlog_debug("%s Can't find the route %s", peer->host, pfx_buf);
}
/* Unlock bgp_node_get() lock. */
bgp_unlock_node(rn);
return 0;
}
void bgp_default_originate(struct peer *peer, afi_t afi, safi_t safi,
int withdraw)
{
struct update_subgroup *subgrp;
subgrp = peer_subgroup(peer, afi, safi);
subgroup_default_originate(subgrp, withdraw);
}
/*
* bgp_stop_announce_route_timer
*/
void bgp_stop_announce_route_timer(struct peer_af *paf)
{
if (!paf->t_announce_route)
return;
THREAD_TIMER_OFF(paf->t_announce_route);
}
/*
* bgp_announce_route_timer_expired
*
* Callback that is invoked when the route announcement timer for a
* peer_af expires.
*/
static int bgp_announce_route_timer_expired(struct thread *t)
{
struct peer_af *paf;
struct peer *peer;
paf = THREAD_ARG(t);
peer = paf->peer;
if (peer->status != Established)
return 0;
if (!peer->afc_nego[paf->afi][paf->safi])
return 0;
peer_af_announce_route(paf, 1);
return 0;
}
/*
* bgp_announce_route
*
* *Triggers* announcement of routes of a given AFI/SAFI to a peer.
*/
void bgp_announce_route(struct peer *peer, afi_t afi, safi_t safi)
{
struct peer_af *paf;
struct update_subgroup *subgrp;
paf = peer_af_find(peer, afi, safi);
if (!paf)
return;
subgrp = PAF_SUBGRP(paf);
/*
* Ignore if subgroup doesn't exist (implies AF is not negotiated)
* or a refresh has already been triggered.
*/
if (!subgrp || paf->t_announce_route)
return;
/*
* Start a timer to stagger/delay the announce. This serves
* two purposes - announcement can potentially be combined for
* multiple peers and the announcement doesn't happen in the
* vty context.
*/
thread_add_timer_msec(bm->master, bgp_announce_route_timer_expired, paf,
(subgrp->peer_count == 1)
? BGP_ANNOUNCE_ROUTE_SHORT_DELAY_MS
: BGP_ANNOUNCE_ROUTE_DELAY_MS,
&paf->t_announce_route);
}
/*
* Announce routes from all AF tables to a peer.
*
* This should ONLY be called when there is a need to refresh the
* routes to the peer based on a policy change for this peer alone
* or a route refresh request received from the peer.
* The operation will result in splitting the peer from its existing
* subgroups and putting it in new subgroups.
*/
void bgp_announce_route_all(struct peer *peer)
{
afi_t afi;
safi_t safi;
FOREACH_AFI_SAFI (afi, safi)
bgp_announce_route(peer, afi, safi);
}
static void bgp_soft_reconfig_table(struct peer *peer, afi_t afi, safi_t safi,
struct bgp_table *table,
struct prefix_rd *prd)
{
int ret;
struct bgp_node *rn;
struct bgp_adj_in *ain;
if (!table)
table = peer->bgp->rib[afi][safi];
for (rn = bgp_table_top(table); rn; rn = bgp_route_next(rn))
for (ain = rn->adj_in; ain; ain = ain->next) {
if (ain->peer != peer)
continue;
struct bgp_path_info *pi =
bgp_node_get_bgp_path_info(rn);
uint32_t num_labels = 0;
mpls_label_t *label_pnt = NULL;
struct bgp_route_evpn evpn;
if (pi && pi->extra)
num_labels = pi->extra->num_labels;
if (num_labels)
label_pnt = &pi->extra->label[0];
if (pi)
memcpy(&evpn, &pi->attr->evpn_overlay,
sizeof(evpn));
else
memset(&evpn, 0, sizeof(evpn));
ret = bgp_update(peer, &rn->p, ain->addpath_rx_id,
ain->attr, afi, safi, ZEBRA_ROUTE_BGP,
BGP_ROUTE_NORMAL, prd, label_pnt,
num_labels, 1, &evpn);
if (ret < 0) {
bgp_unlock_node(rn);
return;
}
}
}
void bgp_soft_reconfig_in(struct peer *peer, afi_t afi, safi_t safi)
{
struct bgp_node *rn;
struct bgp_table *table;
if (peer->status != Established)
return;
if ((safi != SAFI_MPLS_VPN) && (safi != SAFI_ENCAP)
&& (safi != SAFI_EVPN))
bgp_soft_reconfig_table(peer, afi, safi, NULL, NULL);
else
for (rn = bgp_table_top(peer->bgp->rib[afi][safi]); rn;
rn = bgp_route_next(rn)) {
table = bgp_node_get_bgp_table_info(rn);
if (table != NULL) {
struct prefix_rd prd;
prd.family = AF_UNSPEC;
prd.prefixlen = 64;
memcpy(&prd.val, rn->p.u.val, 8);
bgp_soft_reconfig_table(peer, afi, safi, table,
&prd);
}
}
}
struct bgp_clear_node_queue {
struct bgp_node *rn;
};
static wq_item_status bgp_clear_route_node(struct work_queue *wq, void *data)
{
struct bgp_clear_node_queue *cnq = data;
struct bgp_node *rn = cnq->rn;
struct peer *peer = wq->spec.data;
struct bgp_path_info *pi;
struct bgp *bgp;
afi_t afi = bgp_node_table(rn)->afi;
safi_t safi = bgp_node_table(rn)->safi;
assert(rn && peer);
bgp = peer->bgp;
/* It is possible that we have multiple paths for a prefix from a peer
* if that peer is using AddPath.
*/
for (pi = bgp_node_get_bgp_path_info(rn); pi; pi = pi->next) {
if (pi->peer != peer)
continue;
/* graceful restart STALE flag set. */
if (CHECK_FLAG(peer->sflags, PEER_STATUS_NSF_WAIT)
&& peer->nsf[afi][safi]
&& !CHECK_FLAG(pi->flags, BGP_PATH_STALE)
&& !CHECK_FLAG(pi->flags, BGP_PATH_UNUSEABLE))
bgp_path_info_set_flag(rn, pi, BGP_PATH_STALE);
else {
/* If this is an EVPN route, process for
* un-import. */
if (safi == SAFI_EVPN)
bgp_evpn_unimport_route(bgp, afi, safi, &rn->p,
pi);
/* Handle withdraw for VRF route-leaking and L3VPN */
if (SAFI_UNICAST == safi
&& (bgp->inst_type == BGP_INSTANCE_TYPE_VRF ||
bgp->inst_type == BGP_INSTANCE_TYPE_DEFAULT)) {
vpn_leak_from_vrf_withdraw(bgp_get_default(),
bgp, pi);
}
if (SAFI_MPLS_VPN == safi &&
bgp->inst_type == BGP_INSTANCE_TYPE_DEFAULT) {
vpn_leak_to_vrf_withdraw(bgp, pi);
}
bgp_rib_remove(rn, pi, peer, afi, safi);
}
}
return WQ_SUCCESS;
}
static void bgp_clear_node_queue_del(struct work_queue *wq, void *data)
{
struct bgp_clear_node_queue *cnq = data;
struct bgp_node *rn = cnq->rn;
struct bgp_table *table = bgp_node_table(rn);
bgp_unlock_node(rn);
bgp_table_unlock(table);
XFREE(MTYPE_BGP_CLEAR_NODE_QUEUE, cnq);
}
static void bgp_clear_node_complete(struct work_queue *wq)
{
struct peer *peer = wq->spec.data;
/* Tickle FSM to start moving again */
BGP_EVENT_ADD(peer, Clearing_Completed);
peer_unlock(peer); /* bgp_clear_route */
}
static void bgp_clear_node_queue_init(struct peer *peer)
{
char wname[sizeof("clear xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx")];
snprintf(wname, sizeof(wname), "clear %s", peer->host);
#undef CLEAR_QUEUE_NAME_LEN
peer->clear_node_queue = work_queue_new(bm->master, wname);
peer->clear_node_queue->spec.hold = 10;
peer->clear_node_queue->spec.workfunc = &bgp_clear_route_node;
peer->clear_node_queue->spec.del_item_data = &bgp_clear_node_queue_del;
peer->clear_node_queue->spec.completion_func = &bgp_clear_node_complete;
peer->clear_node_queue->spec.max_retries = 0;
/* we only 'lock' this peer reference when the queue is actually active
*/
peer->clear_node_queue->spec.data = peer;
}
static void bgp_clear_route_table(struct peer *peer, afi_t afi, safi_t safi,
struct bgp_table *table)
{
struct bgp_node *rn;
int force = bm->process_main_queue ? 0 : 1;
if (!table)
table = peer->bgp->rib[afi][safi];
/* If still no table => afi/safi isn't configured at all or smth. */
if (!table)
return;
for (rn = bgp_table_top(table); rn; rn = bgp_route_next(rn)) {
struct bgp_path_info *pi, *next;
struct bgp_adj_in *ain;
struct bgp_adj_in *ain_next;
/* XXX:TODO: This is suboptimal, every non-empty route_node is
* queued for every clearing peer, regardless of whether it is
* relevant to the peer at hand.