Skip to content

Commit

Permalink
Add support for fdb_event MOVE and check fdb event oids (#420)
Browse files Browse the repository at this point in the history
* Add support for fdb_event MOVE and check fdb event oids

* bring back ntf_queue constructor

* Log also entire fdb_entry
  • Loading branch information
kcudnik authored and lguohan committed Feb 15, 2019
1 parent 2b91013 commit d685e35
Show file tree
Hide file tree
Showing 5 changed files with 229 additions and 5 deletions.
95 changes: 95 additions & 0 deletions meta/sai_meta.cpp
Expand Up @@ -6015,6 +6015,45 @@ void meta_sai_on_fdb_flush_event_consolidated(
}
}

void meta_fdb_event_snoop_oid(
_In_ sai_object_id_t oid)
{
SWSS_LOG_ENTER();

if (oid == SAI_NULL_OBJECT_ID)
return;

if (object_reference_exists(oid))
return;

sai_object_type_t ot = sai_object_type_query(oid);

if (ot == SAI_OBJECT_TYPE_NULL)
{
SWSS_LOG_ERROR("failed to get object type on fdb_event oid: 0x%lx", oid);
return;
}

sai_object_meta_key_t key = { .objecttype = ot, .objectkey = { .key = { .object_id = oid } } };

object_reference_insert(oid);

if (!object_exists(key))
create_object(key);

/*
* In normal operation orch agent should query or create all bridge, vlan
* and bridge port, so we should not get this message. Let's put it as
* warning for better visibility. Most likely if this happen there is a
* vendor bug in SAI and we should also see warnings or errors reported
* from syncd in logs.
*/

SWSS_LOG_WARN("fdb_entry oid (snoop): %s: %s",
sai_serialize_object_type(ot).c_str(),
sai_serialize_object_id(oid).c_str());
}

void meta_sai_on_fdb_event_single(
_In_ const sai_fdb_event_notification_data_t& data)
{
Expand All @@ -6024,6 +6063,35 @@ void meta_sai_on_fdb_event_single(

std::string key_fdb = sai_serialize_object_meta_key(meta_key_fdb);

/*
* Because we could receive fdb event's before orch agent will query or
* create bridge/vlan/bridge port we should snoop here new OIDs and put
* them in local DB.
*
* Unfortunately we don't have a way to check whether those OIDs are correct
* or whether there maybe some bug in vendor SAI and for example is sending
* invalid OIDs in those event's. Also sai_object_type_query can return
* valid object type for OID, but this does not guarantee that this OID is
* valid, for example one of existing bridge ports that orch agent didn't
* query yet.
*/

meta_fdb_event_snoop_oid(data.fdb_entry.bv_id);

for (uint32_t i = 0; i < data.attr_count; i++)
{
auto meta = sai_metadata_get_attr_metadata(SAI_OBJECT_TYPE_FDB_ENTRY, data.attr[i].id);

if (meta == NULL)
{
SWSS_LOG_ERROR("failed to get metadata for fdb_entry attr.id = %d", data.attr[i].id);
continue;
}

if (meta->attrvaluetype == SAI_ATTR_VALUE_TYPE_OBJECT_ID)
meta_fdb_event_snoop_oid(data.attr[i].value.oid);
}

switch (data.event_type)
{
case SAI_FDB_EVENT_LEARNED:
Expand Down Expand Up @@ -6096,6 +6164,33 @@ void meta_sai_on_fdb_event_single(

break;

case SAI_FDB_EVENT_MOVE:

if (!object_exists(key_fdb))
{
SWSS_LOG_WARN("object key %s doesn't exist but received FDB MOVE event", key_fdb.c_str());
break;
}

// on MOVE event, just update attributes on existing entry

for (uint32_t i = 0; i < data.attr_count; i++)
{
const sai_attribute_t& attr = data.attr[i];

sai_status_t status = meta_generic_validation_set(meta_key_fdb, &attr);

if (status != SAI_STATUS_SUCCESS)
{
SWSS_LOG_ERROR("object key %s FDB MOVE event, SET validateion failed on attr.id = %d", key_fdb.c_str(), attr.id);
continue;
}

meta_generic_validation_post_set(meta_key_fdb, &attr);
}

break;

default:

SWSS_LOG_ERROR("got FDB_ENTRY notification with unknown event_type %d, bug?", data.event_type);
Expand Down
47 changes: 45 additions & 2 deletions saiplayer/saiplayer.cpp
Expand Up @@ -1079,6 +1079,8 @@ void processBulk(
}
}

bool g_sleep = false;

int replay(int argc, char **argv)
{
//swss::Logger::getInstance().setMinPrio(swss::Logger::SWSS_DEBUG);
Expand Down Expand Up @@ -1246,17 +1248,51 @@ int replay(int argc, char **argv)
catch (const std::exception &e)
{
SWSS_LOG_NOTICE("line: %s", line.c_str());
SWSS_LOG_NOTICE("resp: %s", response.c_str());
SWSS_LOG_NOTICE("resp (expected): %s", response.c_str());
SWSS_LOG_NOTICE("got: %s", sai_serialize_status(status).c_str());

if (api == SAI_COMMON_API_GET && (status == SAI_STATUS_SUCCESS || status == SAI_STATUS_BUFFER_OVERFLOW))
{
// log each get parameter
for (uint32_t i = 0; i < attr_count; ++i)
{
auto meta = sai_metadata_get_attr_metadata(object_type, attr_list[i].id);

auto val = sai_serialize_attr_value(*meta, attr_list[i]);

SWSS_LOG_NOTICE(" - %s:%s", meta->attridname, val.c_str());
}
}

exit(EXIT_FAILURE);
}

if (api == SAI_COMMON_API_GET && (status == SAI_STATUS_SUCCESS || status == SAI_STATUS_BUFFER_OVERFLOW))
{
// log each get parameter
for (uint32_t i = 0; i < attr_count; ++i)
{
auto meta = sai_metadata_get_attr_metadata(object_type, attr_list[i].id);

auto val = sai_serialize_attr_value(*meta, attr_list[i]);

SWSS_LOG_NOTICE(" - %s:%s", meta->attridname, val.c_str());
}
}
}
}

infile.close();

SWSS_LOG_NOTICE("finished replaying %s with SUCCESS", filename);

if (g_sleep)
{
fprintf(stderr, "Reply SUCCESS, sleeping, watching for notifications\n");

sleep(-1);
}

return 0;
}

Expand All @@ -1273,6 +1309,8 @@ void printUsage()
std::cout << " Enable temporary view between init and apply" << std::endl << std::endl;
std::cout << " -i --inspectAsic:" << std::endl;
std::cout << " Inspect ASIC by ASIC DB" << std::endl << std::endl;
std::cout << " -s --sleep:" << std::endl;
std::cout << " Sleep after success reply, to notice any switch notifications" << std::endl << std::endl;
std::cout << " -h --help:" << std::endl;
std::cout << " Print out this message" << std::endl << std::endl;
}
Expand All @@ -1293,10 +1331,11 @@ int handleCmdLine(int argc, char **argv)
{ "skipNotifySyncd", no_argument, 0, 'C' },
{ "enableDebug", no_argument, 0, 'd' },
{ "inspectAsic", no_argument, 0, 'i' },
{ "sleep", no_argument, 0, 's' },
{ 0, 0, 0, 0 }
};

const char* const optstring = "hCdui";
const char* const optstring = "hCduis";

int c = getopt_long(argc, argv, optstring, long_options, 0);

Expand All @@ -1321,6 +1360,10 @@ int handleCmdLine(int argc, char **argv)
g_inspectAsic = true;
break;

case 's':
g_sleep = true;
break;

case 'h':
printUsage();
exit(EXIT_SUCCESS);
Expand Down
28 changes: 28 additions & 0 deletions syncd/syncd.cpp
Expand Up @@ -448,6 +448,34 @@ sai_object_id_t translate_rid_to_vid(
return vid;
}

/**
* @brief Check if RID exists on the ASIC DB.
*
* @param rid Real object id to check.
*
* @return True if exists or SAI_NULL_OBJECT_ID, otherwise false.
*/
bool check_rid_exists(
_In_ sai_object_id_t rid)
{
SWSS_LOG_ENTER();

if (rid == SAI_NULL_OBJECT_ID)
return true;

if (local_rid_to_vid.find(rid) != local_rid_to_vid.end())
return true;

std::string str_rid = sai_serialize_object_id(rid);

auto pvid = g_redisClient->hget(RIDTOVID, str_rid);

if (pvid != NULL)
return true;

return false;
}

void translate_list_rid_to_vid(
_In_ sai_object_list_t &element,
_In_ sai_object_id_t switch_id)
Expand Down
3 changes: 3 additions & 0 deletions syncd/syncd.h
Expand Up @@ -110,6 +110,9 @@ sai_object_id_t translate_rid_to_vid(
_In_ sai_object_id_t rid,
_In_ sai_object_id_t switch_vid);

bool check_rid_exists(
_In_ sai_object_id_t rid);

void translate_rid_to_vid_list(
_In_ sai_object_type_t object_type,
_In_ sai_object_id_t switch_vid,
Expand Down
61 changes: 58 additions & 3 deletions syncd/syncd_notifications.cpp
Expand Up @@ -240,18 +240,73 @@ void redisPutFdbEntryToAsicView(
g_redisClient->hset(key, strAttrId, strAttrValue);
}

void check_fdb_event_notification_data(
_In_ const sai_fdb_event_notification_data_t& data)
{
SWSS_LOG_ENTER();

/*
* Any new RID value spotted in fdb notification can happen for 2 reasons:
*
* - a bug is present on the vendor SAI, all RID's are already in local or
* REDIS ASIC DB but vendor SAI returned new or invalid RID
*
* - orch agent didn't query yet bridge ID/vlan ID and already
* started to receive fdb notifications in which case warn message
* could be ignored.
*
* If vendor SAI will return invalid RID, then this will later on lead to
* inconsistent DB state and possible failure on apply view after cold or
* warm boot.
*
* On switch init we do discover phase, and we should discover all objects
* so we should not get any of those messages if SAI is in consistent
* state.
*/

if (!check_rid_exists(data.fdb_entry.bv_id))
SWSS_LOG_ERROR("bv_id RID 0x%lx is not present on local ASIC DB: %s", data.fdb_entry.bv_id,
sai_serialize_fdb_entry(data.fdb_entry).c_str());

if (!check_rid_exists(data.fdb_entry.switch_id) || data.fdb_entry.switch_id == SAI_NULL_OBJECT_ID)
SWSS_LOG_ERROR("switch_id RID 0x%lx is not present on local ASIC DB: %s", data.fdb_entry.bv_id,
sai_serialize_fdb_entry(data.fdb_entry).c_str());

for (uint32_t i = 0; i < data.attr_count; i++)
{
const sai_attribute_t& attr = data.attr[i];

auto meta = sai_metadata_get_attr_metadata(SAI_OBJECT_TYPE_FDB_ENTRY, attr.id);

if (meta == NULL)
{
SWSS_LOG_ERROR("unable to get metadata for fdb_entry attr.id = %d", attr.id);
continue;
}

// skip non oid attributes
if (meta->attrvaluetype != SAI_ATTR_VALUE_TYPE_OBJECT_ID)
continue;

if (!check_rid_exists(attr.value.oid))
SWSS_LOG_WARN("RID 0x%lx on %s is not present on local ASIC DB", attr.value.oid, meta->attridname);
}
}

void process_on_fdb_event(
_In_ uint32_t count,
_In_ sai_fdb_event_notification_data_t *data)
{
SWSS_LOG_ENTER();

SWSS_LOG_DEBUG("fdb event count: %d", count);
SWSS_LOG_INFO("fdb event count: %u", count);

for (uint32_t i = 0; i < count; i++)
{
sai_fdb_event_notification_data_t *fdb = &data[i];

check_fdb_event_notification_data(*fdb);

SWSS_LOG_DEBUG("fdb %u: type: %d", i, fdb->event_type);

fdb->fdb_entry.switch_id = translate_rid_to_vid(fdb->fdb_entry.switch_id, SAI_NULL_OBJECT_ID);
Expand Down Expand Up @@ -536,7 +591,7 @@ bool ntf_queue_t::enqueue(

if (!(log_count % 1000))
{
SWSS_LOG_NOTICE("Too many messages in queue (%ld), dropped %ld FDB events!",
SWSS_LOG_NOTICE("Too many messages in queue (%zu), dropped %u FDB events!",
queueStats(), (log_count+1));
}

Expand Down Expand Up @@ -782,6 +837,6 @@ void check_notifications_pointers(
* Here we translated pointer, just log it.
*/

SWSS_LOG_INFO("%s: %lp (orch) => %lp (syncd)", meta->attridname, prev, attr.value.ptr);
SWSS_LOG_INFO("%s: 0x%lX (orch) => 0x%lX (syncd)", meta->attridname, (uint64_t)prev, (uint64_t)attr.value.ptr);
}
}

0 comments on commit d685e35

Please sign in to comment.