Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes #8409: When a node send reports with an unknow configID, no expected reports are showned #1118

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,10 @@ object ComplianceDebugLogger extends Logger {
implicit class RunAndConfigInfoToLog(c: RunAndConfigInfo) {

val logDetails: String = c match {
case NoRunNoInit =>
case NoRunNoExpectedReport =>
"expected NodeConfigId: not found | last run: not found"

case VersionNotFound(lastRunDateTime, lastRunConfigId) =>
case NoExpectedReport(lastRunDateTime, lastRunConfigId) =>
s"expected NodeConfigId: not found |"+
s" last run: nodeConfigId: ${lastRunConfigId.fold("none")(_.value)} received at ${lastRunDateTime}"

Expand All @@ -99,31 +99,31 @@ object ComplianceDebugLogger extends Logger {
s"until ${expirationDateTime} expected NodeConfigId: ${expectedConfigId.toLog} |"+
s" last run: ${optLastRun.fold("none (or too old)")(x => s"nodeConfigId: ${x._2.toLog} received at ${x._1}")}"

case UnexpectedVersion(lastRunDateTime, lastRunConfigId, lastRunExpiration, expectedConfigId, expectedExpiration) =>
case UnexpectedVersion(lastRunDateTime, Some(lastRunConfigInfo), lastRunExpiration, expectedConfigId, expectedExpiration) =>
s"expected NodeConfigId: ${expectedConfigId.toLog} |"+
s" last run: nodeConfigId: ${lastRunConfigId.toLog} received at ${lastRunDateTime} |"+
s" last run: nodeConfigInfo: ${lastRunConfigInfo.toLog} received at ${lastRunDateTime} |"+
s" expired at ${lastRunExpiration}"

case UnexpectedNoVersion(lastRunDateTime, lastRunConfigId, lastRunExpiration, expectedConfigId, expectedExpiration) =>
s"expected NodeConfigId: ${expectedConfigId.toLog} |"+
case UnexpectedNoVersion(lastRunDateTime, Some(lastRunConfigInfo), lastRunExpiration, expectedConfigInfo, expectedExpiration) =>
s"expected NodeConfigId: ${expectedConfigInfo.toLog} |"+
s" last run: no configId, received at ${lastRunDateTime} |"+
s" expired at ${lastRunExpiration}"

case ComputeCompliance(lastRunDateTime, lastRunConfigId, expectedConfigId, expirationDateTime, missingStatus) =>
s"expected NodeConfigId: ${expectedConfigId.toLog} |"+
s" last run: nodeConfigId: ${lastRunConfigId.toLog} received at ${lastRunDateTime} |"+
case x@ComputeCompliance(lastRunDateTime, expectedConfigInfo, expirationDateTime, missingStatus) =>
s"expected NodeConfigId: ${expectedConfigInfo.toLog} |"+
s" last run: nodeConfigId: ${x.lastRunConfigId.value} received at ${lastRunDateTime} |"+
s" expired at ${expirationDateTime}"

}

val logName = c match {
case NoRunNoInit => "NoRunNoInit"
case _: VersionNotFound => "VersionNotFound"
case _: NoReportInInterval => "NoReportInInterval"
case _: Pending => "Pending"
case _: UnexpectedVersion => "UnexpectedVersion"
case _: UnexpectedNoVersion => "UnexpectedNoVersion"
case _: ComputeCompliance => "ComputeCompliance"
case NoRunNoExpectedReport => "NoRunNoRunNoExpectedReport"
case _: NoExpectedReport => "NoRunNoExpectedReport"
case _: NoReportInInterval => "NoReportInInterval"
case _: Pending => "Pending"
case _: UnexpectedVersion => "UnexpectedVersion"
case _: UnexpectedNoVersion => "UnexpectedNoVersion"
case _: ComputeCompliance => "ComputeCompliance"
}

val toLog: String = logName + ": " + logDetails
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ sealed trait RunAndConfigInfo
sealed trait ErrorNoConfigData extends RunAndConfigInfo

sealed trait ExpectedConfigAvailable extends RunAndConfigInfo {
def expectedConfigId: NodeConfigIdInfo
def expectedConfigInfo: NodeConfigIdInfo
}

sealed trait NoReport extends ExpectedConfigAvailable
Expand All @@ -128,9 +128,16 @@ sealed trait LastRunAvailable extends RunAndConfigInfo {
//config id because we must have some logic to find WHAT
//is the applicable version in all case - it's one of the
//major goal of ExecutionBatch#computeNodeRunInfo
def lastRunConfigId: NodeConfigIdInfo
def lastRunConfigId: NodeConfigId

// most of the time, we do have the corresponding
// configId in base. But not always, for example
// if the node configId was corrupted
def lastRunConfigInfo: Option[NodeConfigIdInfo]
}



/**
* The date and time when the status expires.
* Depends of the compliance mode and the heartbeat period
Expand All @@ -151,7 +158,7 @@ sealed trait ExpiringStatus extends RunAndConfigInfo {
/*
* Really, that node exists ?
*/
case object NoRunNoInit extends ErrorNoConfigData
case object NoRunNoExpectedReport extends ErrorNoConfigData

/*
* We don't have the needed configId in the expected
Expand All @@ -160,7 +167,7 @@ case object NoRunNoInit extends ErrorNoConfigData
* (it is some weird data lost in the server, or a node
* not yet initialized)
*/
case class VersionNotFound(
case class NoExpectedReport(
lastRunDateTime: DateTime
, lastRunConfigId: Option[NodeConfigId]
) extends ErrorNoConfigData
Expand All @@ -171,13 +178,13 @@ case class VersionNotFound(
* some but too old for our situation)
*/
case class NoReportInInterval(
expectedConfigId: NodeConfigIdInfo
expectedConfigInfo: NodeConfigIdInfo
) extends NoReport



case class Pending(
expectedConfigId : NodeConfigIdInfo
expectedConfigInfo : NodeConfigIdInfo
, optLastRun : Option[(DateTime, NodeConfigIdInfo)]
, expirationDateTime : DateTime
, missingReportStatus: ReportType
Expand All @@ -191,11 +198,13 @@ case class Pending(
*/
case class UnexpectedVersion(
lastRunDateTime : DateTime
, lastRunConfigId : NodeConfigIdInfo
, lastRunConfigInfo : Some[NodeConfigIdInfo]
, lastRunExpiration : DateTime
, expectedConfigId : NodeConfigIdInfo
, expectedConfigInfo: NodeConfigIdInfo
, expectedExpiration: DateTime
) extends Unexpected with LastRunAvailable
) extends Unexpected with LastRunAvailable {
val lastRunConfigId = lastRunConfigInfo.get.configId
}

/**
* A case where we have a run without version,
Expand All @@ -204,20 +213,39 @@ case class UnexpectedVersion(
*/
case class UnexpectedNoVersion(
lastRunDateTime : DateTime
, lastRunConfigId : NodeConfigIdInfo
, lastRunConfigInfo : Some[NodeConfigIdInfo]
, lastRunExpiration : DateTime
, expectedConfigId : NodeConfigIdInfo
, expectedConfigInfo: NodeConfigIdInfo
, expectedExpiration: DateTime
) extends Unexpected with LastRunAvailable
) extends Unexpected with LastRunAvailable {
val lastRunConfigId = lastRunConfigInfo.get.configId
}

/**
* A case where we have a run with a version,
* but we didn't find it in db,
* but we really should, because versions are init
* in the server for that node
*/
case class UnexpectedUnknowVersion(
lastRunDateTime : DateTime
, lastRunConfigId : NodeConfigId
, expectedConfigInfo: NodeConfigIdInfo
, expectedExpiration: DateTime
) extends Unexpected with LastRunAvailable {
val lastRunConfigInfo = None
}


case class ComputeCompliance(
lastRunDateTime : DateTime
, lastRunConfigId : NodeConfigIdInfo
, expectedConfigId : NodeConfigIdInfo
, expectedConfigInfo : NodeConfigIdInfo
, expirationDateTime : DateTime
, missingReportStatus: ReportType
) extends Ok with LastRunAvailable with ExpiringStatus with MissingReportStatus
) extends Ok with LastRunAvailable with ExpiringStatus with MissingReportStatus {
val lastRunConfigId = expectedConfigInfo.configId
val lastRunConfigInfo = Some(expectedConfigInfo)
}



Expand Down Expand Up @@ -320,6 +348,7 @@ object ExecutionBatch extends Loggable {
}

val now = DateTime.now

nodeIds.foreach { case (nodeId, runInfos) =>
ComplianceDebugLogger.node(nodeId).debug(s"Node run configuration: ${(nodeId, complianceMode, runInfos).toLog }")
}
Expand All @@ -346,12 +375,12 @@ object ExecutionBatch extends Loggable {

(optRun, optInfo) match {
case (None, None) =>
runType("", NoRunNoInit)
runType("", NoRunNoExpectedReport)

// There is no run for this node
case (None, Some(configs)) =>
if(configs.isEmpty) {
runType("nodeId exists in DB but has no version (due to cleaning?)", NoRunNoInit)
runType("nodeId exists in DB but has no version (due to cleaning?)", NoRunNoExpectedReport)
} else {
val currentConfig = configs.maxBy(_.creation.getMillis)
val expireTime = currentConfig.creation.plus(updateValidityTime(intervalInfo))
Expand Down Expand Up @@ -392,12 +421,12 @@ object ExecutionBatch extends Loggable {
(runToCheck, nodeConfigIdInfos) match {

case (AgentRun(AgentRunId(_, t), optConfigId, _, _), None) =>
runType("need to regenerate policies?", VersionNotFound(t, optConfigId))
runType("need to regenerate policies?", NoExpectedReport(t, optConfigId))

//The run does not have a configId: migration, new nodes, etc.
case (AgentRun(AgentRunId(_, t), None, _, _), Some(configs)) =>
if(configs.isEmpty) {
runType("nodeId exists in DB but has no version (due to cleaning?)", VersionNotFound(t, None))
runType("nodeId exists in DB but has no version (due to cleaning?)", NoExpectedReport(t, None))
} else {
/*
* Here, we want to check two things:
Expand All @@ -415,7 +444,7 @@ object ExecutionBatch extends Loggable {
val currentConfig = configs.maxBy( _.creation.getMillis)
val currentExpiration = currentConfig.creation.plus(updateValidityDuration)
runType(s"node send reports without nodeConfigId but the oldest configId (${oldestConfigId.configId.value}) expired since ${oldestExpiration})"
, UnexpectedNoVersion(t, oldestConfigId, oldestExpiration, currentConfig, currentExpiration)
, UnexpectedNoVersion(t, Some(oldestConfigId), oldestExpiration, currentConfig, currentExpiration)
)
} else {
val currentConfigId = configs.maxBy( _.creation.getMillis )
Expand All @@ -437,17 +466,20 @@ object ExecutionBatch extends Loggable {
if(configs.isEmpty) {
//error: we have a MISSING config id. Contrary to the case where any config id is missing
//for the node, here we have a BAD id.
runType("nodeId exists in DB but has no version (due to cleaning?)", VersionNotFound(t, Some(rv)))
runType("nodeId exists in DB but has no version (due to cleaning?)", NoExpectedReport(t, Some(rv)))
} else {
configs.find { i => i.configId == rv} match {
case None =>
//it's a bad version.
runType(s"nodeId exists in DB and has configId, but not ${rv.value} (due to cleaning?)", VersionNotFound(t, Some(rv)))
//it's a bad version, but we have config id in DB => likelly a corruption on node
val expectedVersion = configs.maxBy( _.creation.getMillis )
//expirationTime is the date after which we must have gotten a report for that version
val expirationTime = expectedVersion.creation.plus(updateValidityDuration)

case Some(v) => //nominal case !
//check if the run is not too old for the version, i.e if endOflife + grace is before run
val currentConfigId = configs.maxBy( _.creation.getMillis )
runType(s"nodeId exists in DB and has configId, expected configId is ${expectedVersion.configId.value}, but ${rv.value} was not found (node corruption?)",
UnexpectedUnknowVersion(t, rv, expectedVersion, expirationTime)
)

case Some(v) => //nominal case !
v.endOfLife match {
case None =>
//nominal (bis)! The node is answering to current config !
Expand All @@ -458,19 +490,22 @@ object ExecutionBatch extends Loggable {
)
} else { //nominal case
runType(s"Last run at ${t} is for the correct configId ${v.configId.value} and not expired, compute compliance"
, ComputeCompliance(t, v, currentConfigId, expirationTime, missingReportType)
, ComputeCompliance(t, v, expirationTime, missingReportType)
)
}

case Some(eol) =>
//check if the run is not too old for the version, i.e if endOflife + grace is before run
val currentConfigId = configs.maxBy( _.creation.getMillis )

// a more recent version exists, so we are either awaiting reports
// for it, or in some error state (completely unexpected version or "just" no report
val eolExpiration = eol.plus(updateValidityDuration)
val expirationTime = currentConfigId.creation.plus(updateValidityDuration)
if(eolExpiration.isBefore(t)) {
//we should have had a more recent run
runType(s"node sent reports at ${t} for configId ${rv.value} (which expired at ${eol}) but should have been for configId ${currentConfigId.configId.value}"
, UnexpectedVersion(t, v, eolExpiration, currentConfigId, expirationTime)
, UnexpectedVersion(t, Some(v), eolExpiration, currentConfigId, expirationTime)
)
} else {
if(expirationTime.isBefore(timeToCompareTo)) {
Expand Down Expand Up @@ -522,15 +557,15 @@ object ExecutionBatch extends Loggable {
}
}

def buildUnexpectedVersion(runTime: DateTime, runVersion: NodeConfigIdInfo, runExpiration: DateTime, expectedVersion: NodeConfigIdInfo, expectedExpiration: DateTime, nodeStatusReports: Seq[ResultReports]) = {
def buildUnexpectedVersion(runTime: DateTime, runVersion: Option[NodeConfigIdInfo], runExpiration: DateTime, expectedVersion: NodeConfigIdInfo, expectedExpiration: DateTime, nodeStatusReports: Seq[ResultReports]) = {
//mark all report of run unexpected,
//all expected missing
buildRuleNodeStatusReport(
MergeInfo(nodeId, Some(runTime), Some(expectedVersion.configId), expectedExpiration)
, getExpectedReports(expectedVersion.configId)
, MissingReportType
) ++
buildUnexpectedReports(MergeInfo(nodeId, Some(runTime), Some(runVersion.configId), runExpiration), nodeStatusReports)
buildUnexpectedReports(MergeInfo(nodeId, Some(runTime), runVersion.map(_.configId), runExpiration), nodeStatusReports)
}

//only interesting reports: for that node, with a status
Expand All @@ -540,12 +575,12 @@ object ExecutionBatch extends Loggable {

val ruleNodeStatusReports = runInfo match {

case ComputeCompliance(lastRunDateTime, lastRunConfigId, expectedConfigId, expirationTime, missingReportStatus) =>
ComplianceDebugLogger.node(nodeId).trace(s"Using merge/compare strategy between last reports from run ${lastRunConfigId.toLog} and expect reports ${expectedConfigId.toLog}")
case ComputeCompliance(lastRunDateTime, expectedConfigId, expirationTime, missingReportStatus) =>
ComplianceDebugLogger.node(nodeId).trace(s"Using merge/compare strategy between last reports from run at ${lastRunDateTime} and expect reports ${expectedConfigId.toLog}")
mergeCompareByRule(
MergeInfo(nodeId, Some(lastRunDateTime), Some(lastRunConfigId.configId), expirationTime)
MergeInfo(nodeId, Some(lastRunDateTime), Some(expectedConfigId.configId), expirationTime)
, nodeStatusReports
, getExpectedReports(lastRunConfigId.configId)
, getExpectedReports(expectedConfigId.configId)
, getExpectedReports(expectedConfigId.configId)
, missingReportStatus
)
Expand Down Expand Up @@ -588,22 +623,26 @@ object ExecutionBatch extends Loggable {
, NoAnswerReportType
)

case UnexpectedVersion(runTime, runVersion, runExpiration, expectedVersion, expectedExpiration) =>
case UnexpectedVersion(runTime, Some(runVersion), runExpiration, expectedVersion, expectedExpiration) =>
ComplianceDebugLogger.node(nodeId).warn(s"Received a run at ${runTime} for node '${nodeId.value}' with configId '${runVersion.configId.value}' but that node should be sending reports for configId ${expectedVersion.configId.value}")
buildUnexpectedVersion(runTime, runVersion, runExpiration, expectedVersion, expectedExpiration, nodeStatusReports)
buildUnexpectedVersion(runTime, Some(runVersion), runExpiration, expectedVersion, expectedExpiration, nodeStatusReports)

case UnexpectedNoVersion(runTime, runVersion, runExpiration, expectedVersion, expectedExpiration) => //same as unextected, different log
case UnexpectedNoVersion(runTime, Some(runVersion), runExpiration, expectedVersion, expectedExpiration) => //same as unextected, different log
ComplianceDebugLogger.node(nodeId).warn(s"Received a run at ${runTime} for node '${nodeId.value}' without any configId but that node should be sending reports for configId ${expectedVersion.configId.value}")
buildUnexpectedVersion(runTime, runVersion, runExpiration, expectedVersion, expectedExpiration, nodeStatusReports)
buildUnexpectedVersion(runTime, Some(runVersion), runExpiration, expectedVersion, expectedExpiration, nodeStatusReports)

case UnexpectedUnknowVersion(runTime, runId, expectedVersion, expectedExpiration) => //same as unextected, different log
ComplianceDebugLogger.node(nodeId).warn(s"Received a run at ${runTime} for node '${nodeId.value}' configId '${runId.value}' which is not known by Rudder, and that node should be sending reports for configId ${expectedVersion.configId.value}")
buildUnexpectedVersion(runTime, None, runTime, expectedVersion, expectedExpiration, nodeStatusReports)

case VersionNotFound(runTime, optConfigId) =>
case NoExpectedReport(runTime, optConfigId) =>
// these reports where not expected
ComplianceDebugLogger.node(nodeId).warn(s"Node '${nodeId.value}' sent reports for run at '${runInfo}' (with ${
optConfigId.map(x => s" configuration ID: '${x.value}'").getOrElse(" no configuration ID")
}). No expected configuration matches these reports.")
buildUnexpectedReports(MergeInfo(nodeId, Some(runTime), optConfigId, END_OF_TIME), nodeStatusReports)

case NoRunNoInit =>
case NoRunNoExpectedReport =>
/*
* Really, this node exists ? Shouldn't we just declare Ragnarök at that point ?
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,17 +274,17 @@ trait DefaultFindRuleNodeStatusReports extends ReportingService {
runInfos <- getNodeRunInfos(nodeIds)

// that gives us configId for runs, and expected configId (some may be in both set)
expectedConfigId = runInfos.collect { case (nodeId, x:ExpectedConfigAvailable) => NodeAndConfigId(nodeId, x.expectedConfigId.configId) }
expectedConfigIds = runInfos.collect { case (nodeId, x:ExpectedConfigAvailable) => NodeAndConfigId(nodeId, x.expectedConfigInfo.configId) }
lastrunConfigId = runInfos.collect {
case (nodeId, x:LastRunAvailable) => NodeAndConfigId(nodeId, x.lastRunConfigId.configId)
case (nodeId, x:LastRunAvailable) => NodeAndConfigId(nodeId, x.lastRunConfigId)
case (nodeId, Pending(_, Some(run), _, _)) => NodeAndConfigId(nodeId, run._2.configId)
}

t1 = System.currentTimeMillis
_ = TimingDebugLogger.debug(s"Compliance: get run infos: ${t1-t0}ms")

// so now, get all expected reports for these config id
allExpectedReports <- confExpectedRepo.getExpectedReports(expectedConfigId.toSet++lastrunConfigId, ruleIds)
allExpectedReports <- confExpectedRepo.getExpectedReports(expectedConfigIds.toSet++lastrunConfigId, ruleIds)

t2 = System.currentTimeMillis
_ = TimingDebugLogger.debug(s"Compliance: get expected reports: ${t2-t1}ms")
Expand Down
Loading