Skip to content

Commit

Permalink
contrail-collector crash immediately after provisioning
Browse files Browse the repository at this point in the history
    root cause:
      Race condition problem:
      To state_machine_,
          (1) alloced by sandesh_connection.
          (2) used by generator
      When problem happen, generator receive Resource update message,
      and enqueue resouece update to state_machine_, at same time,
      update stats immedietly. This action will try to get mutex
      sometime, it will lead CPU yield. We call this as thread 1.
      At same time, connection close is triggered, and destructor
      function will be triggered. And destructure will call termial
      and all memory will be released related to this connection.
      We call this as thread 2.
      When thread 2 finished and thread 1 go ahead, crash will happen.

    Solution:
      Designer of state_machine should consider this problem. So state
      Machine destructure is separated two steps:
         (1) call terminal to free memory alloced by its substruct.
         (2) start a timer to free state machine self.
     Between step1 and step2, deleted_ is used to check state machine
     can be used or not.
     We add a shutdown fucntion for stats structure to pass this state.

Closes-Bug: 1755649

Change-Id: I599461f0a37adc21d2b68a5ca20d66ccaf4f6e51
  • Loading branch information
ZhiqiangCui committed Jun 12, 2018
1 parent 144c18a commit 8e9f02f
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 2 deletions.
2 changes: 2 additions & 0 deletions library/cpp/sandesh_state_machine.cc
Expand Up @@ -405,6 +405,8 @@ SandeshStateMachine::~SandeshStateMachine() {
deleted_ = true;

work_queue_.Shutdown();
event_stats_.Shutdown();
message_stats_.Shutdown();

assert(session() == NULL);

Expand Down
22 changes: 22 additions & 0 deletions library/cpp/sandesh_statistics.cc
Expand Up @@ -26,6 +26,9 @@ void SandeshMessageStatistics::Get(DetailStatsMap *m_detail_type_stats,

void SandeshMessageStatistics::Get(DetailStatsList *v_detail_type_stats,
SandeshMessageStats *detail_agg_stats) const {
if (deleted_) {
return;
}
BOOST_FOREACH(DetailStatsMap::const_iterator::value_type it,
detail_type_stats_map_) {
v_detail_type_stats->push_back(*it.second);
Expand Down Expand Up @@ -54,6 +57,9 @@ static void PopulateBasicTypeStats(const SandeshMessageTypeStats &detail_stats,

void SandeshMessageStatistics::Get(BasicStatsList *v_basic_type_stats,
SandeshMessageBasicStats *basic_agg_stats) const {
if (deleted_) {
return;
}
BOOST_FOREACH(DetailStatsMap::const_iterator::value_type it,
detail_type_stats_map_) {
const SandeshMessageTypeStats *detail_stats(it.second);
Expand Down Expand Up @@ -230,6 +236,9 @@ void SandeshMessageStatistics::UpdateInternal(const std::string &msg_name,
uint64_t bytes, bool is_tx, bool dropped,
SandeshTxDropReason::type send_dreason,
SandeshRxDropReason::type recv_dreason) {
if (deleted_) {
return;
}
// Update detail stats
DetailStatsMap::iterator it = detail_type_stats_map_.find(msg_name);
if (it == detail_type_stats_map_.end()) {
Expand Down Expand Up @@ -266,11 +275,17 @@ void SandeshMessageStatistics::UpdateInternal(const std::string &msg_name,
}
}

void SandeshMessageStatistics::Shutdown() {
deleted_ = true;
}
//
// SandeshEventStatistics
//
void SandeshEventStatistics::Get(
std::vector<SandeshStateMachineEvStats> *ev_stats) const {
if (deleted_) {
return;
}
BOOST_FOREACH(EventStatsMap::const_iterator::value_type it,
event_stats_) {
ev_stats->push_back(*it.second);
Expand All @@ -280,6 +295,9 @@ void SandeshEventStatistics::Get(

void SandeshEventStatistics::Update(std::string &event_name, bool enqueue,
bool fail) {
if (deleted_) {
return;
}
EventStatsMap::iterator it = event_stats_.find(event_name);
if (it == event_stats_.end()) {
it = (event_stats_.insert(event_name, new SandeshStateMachineEvStats)).first;
Expand All @@ -304,3 +322,7 @@ void SandeshEventStatistics::Update(std::string &event_name, bool enqueue,
}
}
}

void SandeshEventStatistics::Shutdown() {
deleted_ = true;
}
9 changes: 7 additions & 2 deletions library/cpp/sandesh_statistics.h
Expand Up @@ -10,7 +10,7 @@

class SandeshMessageStatistics {
public:
SandeshMessageStatistics() {}
SandeshMessageStatistics() {deleted_ = false;}

void UpdateSend(const std::string &msg_name, uint64_t bytes);
void UpdateSendFailed(const std::string &msg_name, uint64_t bytes,
Expand All @@ -35,8 +35,10 @@ class SandeshMessageStatistics {

void Get(BasicStatsList *v_basic_type_stats,
SandeshMessageBasicStats *basic_agg_stats) const;
void Shutdown();

private:
bool deleted_;
void UpdateInternal(const std::string &msg_name,
uint64_t bytes, bool is_tx, bool dropped,
SandeshTxDropReason::type send_dreason,
Expand All @@ -48,14 +50,17 @@ class SandeshMessageStatistics {

class SandeshEventStatistics {
public:
SandeshEventStatistics() {}
SandeshEventStatistics() {deleted_ = false;}

void Update(std::string &event_name, bool enqueue, bool fail);
void Get(std::vector<SandeshStateMachineEvStats> *ev_stats) const;
void Shutdown();

typedef boost::ptr_map<std::string, SandeshStateMachineEvStats> EventStatsMap;
EventStatsMap event_stats_;
SandeshStateMachineEvStats agg_stats_;
private:
bool deleted_;
};

#endif // __SANDESH_STATISTICS_H__

0 comments on commit 8e9f02f

Please sign in to comment.