-
Notifications
You must be signed in to change notification settings - Fork 73
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fixes #14924: Cleanup unreferenced software #2231
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,19 +38,23 @@ | |
package com.normation.inventory.ldap.core | ||
|
||
import com.normation.ldap.sdk._ | ||
import BuildFilter.{EQ,OR} | ||
import BuildFilter.{EQ, IS, OR} | ||
import com.normation.inventory.domain._ | ||
import com.normation.inventory.services.core.ReadOnlySoftwareDAO | ||
import com.normation.inventory.services.core._ | ||
import LDAPConstants._ | ||
import net.liftweb.common._ | ||
import Box._ | ||
import com.normation.utils.Control.sequence | ||
import com.normation.utils.Control.{bestEffort, sequence} | ||
import com.unboundid.ldap.sdk.{DN, Filter} | ||
|
||
import scala.collection.GenTraversableOnce | ||
|
||
|
||
class ReadOnlySoftwareDAOImpl( | ||
inventoryDitService:InventoryDitService, | ||
ldap:LDAPConnectionProvider[RoLDAPConnection], | ||
mapper:InventoryMapper | ||
) extends ReadOnlySoftwareDAO { | ||
) extends ReadOnlySoftwareDAO with Loggable { | ||
|
||
private[this] def search(con: RoLDAPConnection, ids: Seq[SoftwareUuid]) = { | ||
sequence(con.searchOne(inventoryDitService.getSoftwareBaseDN, OR(ids map {x:SoftwareUuid => EQ(A_SOFTWARE_UUID,x.value) }:_*))) { entry => | ||
|
@@ -76,9 +80,11 @@ class ReadOnlySoftwareDAOImpl( | |
|
||
val dit = inventoryDitService.getDit(status) | ||
|
||
val orFilter = BuildFilter.OR(nodeIds.toSeq.map(x => EQ(A_NODE_UUID,x.value)):_*) | ||
|
||
for { | ||
con <- ldap | ||
nodeEntries = con.searchOne(dit.NODES.dn, BuildFilter.ALL, Seq(A_NODE_UUID, A_SOFTWARE_DN):_*) | ||
nodeEntries = con.searchOne(dit.NODES.dn, orFilter, Seq(A_NODE_UUID, A_SOFTWARE_DN):_*) | ||
softwareByNode = (nodeEntries.flatMap { e => | ||
e(A_NODE_UUID).map { id => | ||
(NodeId(id), e.valuesFor(A_SOFTWARE_DN).flatMap(dn => inventoryDitService.getDit(AcceptedInventory).SOFTWARE.SOFT.idFromDN(dn))) | ||
|
@@ -90,4 +96,98 @@ class ReadOnlySoftwareDAOImpl( | |
softwareByNode.mapValues { ids => software.filter(s => ids.contains(s.id)) } | ||
} | ||
} | ||
|
||
def getAllSoftwareIds() : Box[Set[SoftwareUuid]] = { | ||
for { | ||
con <- ldap | ||
softwareEntry = con.searchOne(inventoryDitService.getSoftwareBaseDN, IS(OC_SOFTWARE), A_SOFTWARE_UUID) | ||
ids <- sequence(softwareEntry) { entry => | ||
entry(A_SOFTWARE_UUID) match { | ||
case Some(value) => Full(value) | ||
case _ => Failure(s"Missing attribute ${A_SOFTWARE_UUID} for entry ${entry.dn} ${entry.toString()}") | ||
} | ||
} | ||
softIds = ids.map(id => SoftwareUuid(id)).toSet | ||
} yield { | ||
softIds | ||
} | ||
} | ||
|
||
def getSoftwaresForAllNodes() : Box[Set[SoftwareUuid]] = { | ||
// fetch all softwares, for all nodes, in all 3 dits | ||
val acceptedDit = inventoryDitService.getDit(AcceptedInventory) | ||
|
||
var mutSetSoftwares: Box[scala.collection.mutable.Set[SoftwareUuid]] = Full(scala.collection.mutable.Set[SoftwareUuid]()) | ||
|
||
val t1 = System.currentTimeMillis | ||
for { | ||
con <- ldap | ||
|
||
// fetch all nodes | ||
nodes = con.searchSub(acceptedDit.NODES.dn.getParent, IS(OC_NODE), A_NODE_UUID) | ||
|
||
batchedNodes = nodes.grouped(50) | ||
|
||
_ = batchedNodes.foreach { nodeEntries: Seq[LDAPEntry] => | ||
val nodeIds = nodeEntries.flatMap(_(A_NODE_UUID)).map(NodeId(_)) | ||
|
||
val t2 = System.currentTimeMillis | ||
val orFilter = BuildFilter.OR(nodeIds.map(x => EQ(A_NODE_UUID, x.value)): _*) | ||
val softwareEntry= con.searchSub(acceptedDit.NODES.dn.getParent, orFilter, A_SOFTWARE_DN) | ||
val ids = softwareEntry.flatMap(entry => entry.valuesFor(A_SOFTWARE_DN).toSet ) | ||
val results = sequence(ids) { id => acceptedDit.SOFTWARE.SOFT.idFromDN(new DN(id)) } | ||
val t3 = System.currentTimeMillis() | ||
logger.debug(s"Software DNs from 50 nodes fetched in ${t3-t2}ms") | ||
results match { | ||
case Full(softIds) => | ||
mutSetSoftwares = mutSetSoftwares.map(t => t ++ softIds) | ||
Full(Unit) | ||
case Failure(msg, exception, chain) => mutSetSoftwares = Failure(msg, exception, chain) // otherwise the time is wrong | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this ain't pretty, yet its more efficient There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this code is surely non idiomatic, and i fail to improve it |
||
|
||
|
||
} yield { | ||
mutSetSoftwares | ||
} | ||
mutSetSoftwares.map(x => x.toSet) | ||
|
||
/* | ||
// TODO: This needs pagination, with 1000 nodes, it uses about 1,5 GB | ||
softwareEntry = con.searchSub(acceptedDit.NODES.dn, IS(OC_NODE), A_SOFTWARE_DN) // it's really a dn that is stored in software attribute | ||
t2 = System.currentTimeMillis() | ||
_ = logger.debug(s"All Software DNs from all nodes ${softwareEntry.size} fetched in ${t2-t1}ms") | ||
|
||
// This could be more efficient, as it takes 3 secodes to process | ||
ids = softwareEntry.flatMap( entry => entry.valuesFor(A_SOFTWARE_DN).toSet ) | ||
t3 = System.currentTimeMillis() | ||
_ = logger.debug(s"All software DNs deduplicated, resulting in ${ids.size} software entries, in ${t3-t2}ms") | ||
|
||
softIds <- sequence(ids.toSeq) { id => acceptedDit.SOFTWARE.SOFT.idFromDN(new DN(id)) } | ||
t4 = System.currentTimeMillis() | ||
_ = logger.trace(s"All software DNs to software ids in ${t4-t3}ms") | ||
} yield { | ||
softIds.toSet | ||
}*/ | ||
} | ||
|
||
} | ||
|
||
class WriteOnlySoftwareDAOImpl( | ||
inventoryDit :InventoryDit | ||
, ldap :LDAPConnectionProvider[RwLDAPConnection] | ||
) extends WriteOnlySoftwareDAO { | ||
|
||
|
||
def deleteSoftwares(softwareIds: Seq[SoftwareUuid]): Box[Seq[String]] = { | ||
for { | ||
con <- ldap | ||
dns = softwareIds.map(inventoryDit.SOFTWARE.SOFT.dn(_)) | ||
res <- bestEffort(dns) { dn => | ||
con.delete(dn) | ||
} | ||
} yield { | ||
res.flatten.map( entry => entry.getDN) | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
package com.normation.inventory.ldap.core | ||
|
||
|
||
import com.normation.inventory.services.core.{ReadOnlySoftwareDAO, WriteOnlySoftwareDAO} | ||
import net.liftweb.common.Loggable | ||
import net.liftweb.common._ | ||
|
||
|
||
trait SoftwareService { | ||
def deleteUnreferencedSoftware() : Box[Seq[String]] | ||
} | ||
|
||
class SoftwareServiceImpl( | ||
readOnlySoftware : ReadOnlySoftwareDAO | ||
, writeOnlySoftware : WriteOnlySoftwareDAO) | ||
extends SoftwareService with Loggable { | ||
|
||
/** Delete all unreferenced softwares | ||
* First search in software, and then in nodes, so that if a node arrives in between (new inventory) | ||
* its software wont be deleted | ||
*/ | ||
def deleteUnreferencedSoftware() : Box[Seq[String]] = { | ||
val t1 = System.currentTimeMillis | ||
for { | ||
allSoftwares <- readOnlySoftware.getAllSoftwareIds() | ||
t2 = System.currentTimeMillis() | ||
_ = logger.debug(s"All softwares id in ou=software fetched: ${allSoftwares.size} softwares id in ${t2-t1}ms") | ||
|
||
allNodesSoftwares <- readOnlySoftware.getSoftwaresForAllNodes() | ||
t3 = System.currentTimeMillis() | ||
_ = logger.debug(s"All softwares id in nodes fetched: ${allNodesSoftwares.size} softwares id in ${t3-t2}ms") | ||
|
||
|
||
extraSoftware = allSoftwares -- allNodesSoftwares | ||
_ = logger.debug(s"Found ${extraSoftware.size} unreferenced software in ou=software, going to delete them") | ||
|
||
deletedSoftware <- writeOnlySoftware.deleteSoftwares(extraSoftware.toSeq) | ||
t4 = System.currentTimeMillis() | ||
_ = logger.debug(s"Deleted ${deletedSoftware.size} software in ${t4-t3}ms") | ||
|
||
|
||
} yield { | ||
deletedSoftware.map(x => x.toString).toSeq | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
/* | ||
************************************************************************************* | ||
* Copyright 2017 Normation SAS | ||
************************************************************************************* | ||
* | ||
* This file is part of Rudder. | ||
* | ||
* Rudder is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation, either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* In accordance with the terms of section 7 (7. Additional Terms.) of | ||
* the GNU General Public License version 3, the copyright holders add | ||
* the following Additional permissions: | ||
* Notwithstanding to the terms of section 5 (5. Conveying Modified Source | ||
* Versions) and 6 (6. Conveying Non-Source Forms.) of the GNU General | ||
* Public License version 3, when you create a Related Module, this | ||
* Related Module is not considered as a part of the work and may be | ||
* distributed under the license agreement of your choice. | ||
* A "Related Module" means a set of sources files including their | ||
* documentation that, without modification of the Source Code, enables | ||
* supplementary functions or services in addition to those offered by | ||
* the Software. | ||
* | ||
* Rudder is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU General Public License | ||
* along with Rudder. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
* | ||
************************************************************************************* | ||
*/ | ||
|
||
package com.normation.rudder.batch | ||
|
||
|
||
import com.normation.rudder.domain.logger.ScheduledJobLogger | ||
import com.normation.inventory.ldap.core.SoftwareService | ||
import monix.execution.Scheduler.{global => scheduler} | ||
import net.liftweb.common._ | ||
|
||
import scala.concurrent.duration._ | ||
|
||
|
||
/** | ||
* A naive scheduler which checks every updateInterval if software needs to be deleted | ||
*/ | ||
class PurgeUnreferencedSoftwares( | ||
softwareService : SoftwareService | ||
, updateInterval : FiniteDuration | ||
) { | ||
|
||
val logger = ScheduledJobLogger | ||
|
||
|
||
if (updateInterval < 1.hour) { | ||
logger.info(s"Disable automatic purge of unreferenced softwares (update interval cannot be less than 1 hour)") | ||
} else { | ||
logger.debug(s"***** starting batch that purge unreferenced softwares, every ${updateInterval.toString()} *****") | ||
scheduler.scheduleWithFixedDelay(12.second, updateInterval) { | ||
softwareService.deleteUnreferencedSoftware() match { | ||
case Full(softwares) => | ||
logger.info(s"Purged ${softwares.length} unreferenced softwares") | ||
if (logger.isDebugEnabled && softwares.length > 0) | ||
logger.debug(s"Purged following software: ${softwares.mkString(",")}") | ||
case e: EmptyBox => | ||
val error = (e ?~! s"Error when deleting unreferenced softwares") | ||
logger.error(error.messageChain) | ||
error.rootExceptionCause.foreach(ex => | ||
logger.error("Exception was:", ex) | ||
) | ||
} | ||
} | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it should not be a foreach, else we loose the maybe
Failure
:res <- sequence(batchedNodes) { .... }
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i tried using a sequence on batchedNodes, without success (it's an iterator, I cannot sequence it)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can do:
You still process nodes 50 by 50, and don't hold more node ids than previously