Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes #14924: Cleanup unreferenced software #2231

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

package com.normation.inventory.services.core

import com.normation.inventory.domain.{NodeId, SoftwareUuid, Software, InventoryStatus}
import com.normation.inventory.domain.{InventoryStatus, NodeId, Software, SoftwareUuid}
import net.liftweb.common.Box

trait ReadOnlySoftwareDAO {
Expand All @@ -48,4 +48,22 @@ trait ReadOnlySoftwareDAO {
* as possible
*/
def getSoftwareByNode(nodeIds: Set[NodeId], status: InventoryStatus): Box[Map[NodeId, Seq[Software]]]

/**
* Returns all software ids in ou=Software,ou=Inventories
*/
def getAllSoftwareIds() : Box[Set[SoftwareUuid]]

/**
* Returns all software ids pointed by at least a node (in any of the 3 DIT)
*/
def getSoftwaresForAllNodes() : Box[Set[SoftwareUuid]]
}

trait WriteOnlySoftwareDAO {

/**
* Delete softwares in ou=Software,ou=Inventories
*/
def deleteSoftwares(softwares: Seq[SoftwareUuid]): Box[Seq[String]]
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,23 @@
package com.normation.inventory.ldap.core

import com.normation.ldap.sdk._
import BuildFilter.{EQ,OR}
import BuildFilter.{EQ, IS, OR}
import com.normation.inventory.domain._
import com.normation.inventory.services.core.ReadOnlySoftwareDAO
import com.normation.inventory.services.core._
import LDAPConstants._
import net.liftweb.common._
import Box._
import com.normation.utils.Control.sequence
import com.normation.utils.Control.{bestEffort, sequence}
import com.unboundid.ldap.sdk.{DN, Filter}

import scala.collection.GenTraversableOnce


class ReadOnlySoftwareDAOImpl(
inventoryDitService:InventoryDitService,
ldap:LDAPConnectionProvider[RoLDAPConnection],
mapper:InventoryMapper
) extends ReadOnlySoftwareDAO {
) extends ReadOnlySoftwareDAO with Loggable {

private[this] def search(con: RoLDAPConnection, ids: Seq[SoftwareUuid]) = {
sequence(con.searchOne(inventoryDitService.getSoftwareBaseDN, OR(ids map {x:SoftwareUuid => EQ(A_SOFTWARE_UUID,x.value) }:_*))) { entry =>
Expand All @@ -76,9 +80,11 @@ class ReadOnlySoftwareDAOImpl(

val dit = inventoryDitService.getDit(status)

val orFilter = BuildFilter.OR(nodeIds.toSeq.map(x => EQ(A_NODE_UUID,x.value)):_*)

for {
con <- ldap
nodeEntries = con.searchOne(dit.NODES.dn, BuildFilter.ALL, Seq(A_NODE_UUID, A_SOFTWARE_DN):_*)
nodeEntries = con.searchOne(dit.NODES.dn, orFilter, Seq(A_NODE_UUID, A_SOFTWARE_DN):_*)
softwareByNode = (nodeEntries.flatMap { e =>
e(A_NODE_UUID).map { id =>
(NodeId(id), e.valuesFor(A_SOFTWARE_DN).flatMap(dn => inventoryDitService.getDit(AcceptedInventory).SOFTWARE.SOFT.idFromDN(dn)))
Expand All @@ -90,4 +96,98 @@ class ReadOnlySoftwareDAOImpl(
softwareByNode.mapValues { ids => software.filter(s => ids.contains(s.id)) }
}
}

def getAllSoftwareIds() : Box[Set[SoftwareUuid]] = {
for {
con <- ldap
softwareEntry = con.searchOne(inventoryDitService.getSoftwareBaseDN, IS(OC_SOFTWARE), A_SOFTWARE_UUID)
ids <- sequence(softwareEntry) { entry =>
entry(A_SOFTWARE_UUID) match {
case Some(value) => Full(value)
case _ => Failure(s"Missing attribute ${A_SOFTWARE_UUID} for entry ${entry.dn} ${entry.toString()}")
}
}
softIds = ids.map(id => SoftwareUuid(id)).toSet
} yield {
softIds
}
}

def getSoftwaresForAllNodes() : Box[Set[SoftwareUuid]] = {
// fetch all softwares, for all nodes, in all 3 dits
val acceptedDit = inventoryDitService.getDit(AcceptedInventory)

var mutSetSoftwares: Box[scala.collection.mutable.Set[SoftwareUuid]] = Full(scala.collection.mutable.Set[SoftwareUuid]())

val t1 = System.currentTimeMillis
for {
con <- ldap

// fetch all nodes
nodes = con.searchSub(acceptedDit.NODES.dn.getParent, IS(OC_NODE), A_NODE_UUID)

batchedNodes = nodes.grouped(50)

_ = batchedNodes.foreach { nodeEntries: Seq[LDAPEntry] =>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it should not be a foreach, else we loose the maybe Failure: res <- sequence(batchedNodes) { .... }

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i tried using a sequence on batchedNodes, without success (it's an iterator, I cannot sequence it)

Copy link
Member

@fanf fanf Jun 3, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can do:

      batchedNodes         = con.searchSub(acceptedDit.NODES.dn.getParent, IS(OC_NODE), A_NODE_UUID).grouped(50).toSeq

      res <- sequence(batchedNodes) { nodeEntries: Seq[LDAPEntry] =>

You still process nodes 50 by 50, and don't hold more node ids than previously

val nodeIds = nodeEntries.flatMap(_(A_NODE_UUID)).map(NodeId(_))

val t2 = System.currentTimeMillis
val orFilter = BuildFilter.OR(nodeIds.map(x => EQ(A_NODE_UUID, x.value)): _*)
val softwareEntry= con.searchSub(acceptedDit.NODES.dn.getParent, orFilter, A_SOFTWARE_DN)
val ids = softwareEntry.flatMap(entry => entry.valuesFor(A_SOFTWARE_DN).toSet )
val results = sequence(ids) { id => acceptedDit.SOFTWARE.SOFT.idFromDN(new DN(id)) }
val t3 = System.currentTimeMillis()
logger.debug(s"Software DNs from 50 nodes fetched in ${t3-t2}ms")
results match {
case Full(softIds) =>
mutSetSoftwares = mutSetSoftwares.map(t => t ++ softIds)
Full(Unit)
case Failure(msg, exception, chain) => mutSetSoftwares = Failure(msg, exception, chain) // otherwise the time is wrong
}
}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this ain't pretty, yet its more efficient
[2019-05-23 13:00:06] DEBUG com.normation.inventory.ldap.core.SoftwareServiceImpl - All softwares id in nodes fetched: 37953 softwares id in 10029ms, compared to what is in the TODO below
[2019-05-23 12:55:17] DEBUG com.normation.inventory.ldap.core.SoftwareServiceImpl - All softwares id in nodes fetched: 37953 softwares id in 15197ms

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this code is surely non idiomatic, and i fail to improve it



} yield {
mutSetSoftwares
}
mutSetSoftwares.map(x => x.toSet)

/*
// TODO: This needs pagination, with 1000 nodes, it uses about 1,5 GB
softwareEntry = con.searchSub(acceptedDit.NODES.dn, IS(OC_NODE), A_SOFTWARE_DN) // it's really a dn that is stored in software attribute
t2 = System.currentTimeMillis()
_ = logger.debug(s"All Software DNs from all nodes ${softwareEntry.size} fetched in ${t2-t1}ms")

// This could be more efficient, as it takes 3 secodes to process
ids = softwareEntry.flatMap( entry => entry.valuesFor(A_SOFTWARE_DN).toSet )
t3 = System.currentTimeMillis()
_ = logger.debug(s"All software DNs deduplicated, resulting in ${ids.size} software entries, in ${t3-t2}ms")

softIds <- sequence(ids.toSeq) { id => acceptedDit.SOFTWARE.SOFT.idFromDN(new DN(id)) }
t4 = System.currentTimeMillis()
_ = logger.trace(s"All software DNs to software ids in ${t4-t3}ms")
} yield {
softIds.toSet
}*/
}

}

class WriteOnlySoftwareDAOImpl(
inventoryDit :InventoryDit
, ldap :LDAPConnectionProvider[RwLDAPConnection]
) extends WriteOnlySoftwareDAO {


def deleteSoftwares(softwareIds: Seq[SoftwareUuid]): Box[Seq[String]] = {
for {
con <- ldap
dns = softwareIds.map(inventoryDit.SOFTWARE.SOFT.dn(_))
res <- bestEffort(dns) { dn =>
con.delete(dn)
}
} yield {
res.flatten.map( entry => entry.getDN)
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package com.normation.inventory.ldap.core


import com.normation.inventory.services.core.{ReadOnlySoftwareDAO, WriteOnlySoftwareDAO}
import net.liftweb.common.Loggable
import net.liftweb.common._


trait SoftwareService {
def deleteUnreferencedSoftware() : Box[Seq[String]]
}

class SoftwareServiceImpl(
readOnlySoftware : ReadOnlySoftwareDAO
, writeOnlySoftware : WriteOnlySoftwareDAO)
extends SoftwareService with Loggable {

/** Delete all unreferenced softwares
* First search in software, and then in nodes, so that if a node arrives in between (new inventory)
* its software wont be deleted
*/
def deleteUnreferencedSoftware() : Box[Seq[String]] = {
val t1 = System.currentTimeMillis
for {
allSoftwares <- readOnlySoftware.getAllSoftwareIds()
t2 = System.currentTimeMillis()
_ = logger.debug(s"All softwares id in ou=software fetched: ${allSoftwares.size} softwares id in ${t2-t1}ms")

allNodesSoftwares <- readOnlySoftware.getSoftwaresForAllNodes()
t3 = System.currentTimeMillis()
_ = logger.debug(s"All softwares id in nodes fetched: ${allNodesSoftwares.size} softwares id in ${t3-t2}ms")


extraSoftware = allSoftwares -- allNodesSoftwares
_ = logger.debug(s"Found ${extraSoftware.size} unreferenced software in ou=software, going to delete them")

deletedSoftware <- writeOnlySoftware.deleteSoftwares(extraSoftware.toSeq)
t4 = System.currentTimeMillis()
_ = logger.debug(s"Deleted ${deletedSoftware.size} software in ${t4-t3}ms")


} yield {
deletedSoftware.map(x => x.toString).toSeq
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ cn: Software 2
softwareVersion: 2.0-rc
description: Second software

dn: softwareId=soft2,ou=Software,ou=Inventories,cn=rudder-configuration
softwareId: soft2
objectClass: software
objectClass: top
cn: Software 3
softwareVersion: 3.0-rc
description: Third software, unreferenced

###################################################################################################
# Nodes #
###################################################################################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ import org.specs2.runner._
import com.normation.ldap.listener.InMemoryDsConnectionProvider
import com.unboundid.ldap.sdk.DN
import com.normation.inventory.domain._
import com.normation.ldap.sdk.RwLDAPConnection
import com.normation.ldap.sdk.{RoLDAPConnection, RwLDAPConnection}
import net.liftweb.common.Empty
import net.liftweb.common.Full
import net.liftweb.common.EmptyBox
Expand Down Expand Up @@ -93,6 +93,11 @@ class TestInventory extends Specification {
, bootstrapLDIFPaths = bootstrapLDIFs
)

val roLdap = InMemoryDsConnectionProvider[RoLDAPConnection](
baseDNs = baseDN :: Nil
, schemaLDIFPaths = schemaLDIFs
, bootstrapLDIFPaths = bootstrapLDIFs
)

val softwareDN = new DN("ou=Inventories, cn=rudder-configuration")

Expand All @@ -117,6 +122,12 @@ class TestInventory extends Specification {

val repo = new FullInventoryRepositoryImpl(inventoryDitService, inventoryMapper, ldap)

val readOnlySoftware = new ReadOnlySoftwareDAOImpl(inventoryDitService, roLdap, inventoryMapper)

val writeOnlySoftware = new WriteOnlySoftwareDAOImpl(acceptedNodesDitImpl, ldap)


val softwareService = new SoftwareServiceImpl(readOnlySoftware, writeOnlySoftware)

val allStatus = Seq(RemovedInventory, PendingInventory, AcceptedInventory)

Expand Down Expand Up @@ -400,6 +411,22 @@ class TestInventory extends Specification {

}

"Softwares" should {
"Find 2 software referenced by nodes with the repository" in {
val softwares = readOnlySoftware.getSoftwaresForAllNodes()
softwares.isOK and softwares.map(_.size) == Full(2)
}

"Find 3 software in ou=software with the repository" in {
val softwares = readOnlySoftware.getAllSoftwareIds()
softwares.isOK and softwares.map(_.size) == Full(3)
}

"Purge one unreferenced software with the SoftwareService" in {
val purgedSoftwares = softwareService.deleteUnreferencedSoftware()
purgedSoftwares.isOK and purgedSoftwares.map(_.size) == Full(1)
}
}

step {
ldap.close
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
*************************************************************************************
* Copyright 2017 Normation SAS
*************************************************************************************
*
* This file is part of Rudder.
*
* Rudder is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* In accordance with the terms of section 7 (7. Additional Terms.) of
* the GNU General Public License version 3, the copyright holders add
* the following Additional permissions:
* Notwithstanding to the terms of section 5 (5. Conveying Modified Source
* Versions) and 6 (6. Conveying Non-Source Forms.) of the GNU General
* Public License version 3, when you create a Related Module, this
* Related Module is not considered as a part of the work and may be
* distributed under the license agreement of your choice.
* A "Related Module" means a set of sources files including their
* documentation that, without modification of the Source Code, enables
* supplementary functions or services in addition to those offered by
* the Software.
*
* Rudder is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Rudder. If not, see <http://www.gnu.org/licenses/>.

*
*************************************************************************************
*/

package com.normation.rudder.batch


import com.normation.rudder.domain.logger.ScheduledJobLogger
import com.normation.inventory.ldap.core.SoftwareService
import monix.execution.Scheduler.{global => scheduler}
import net.liftweb.common._

import scala.concurrent.duration._


/**
* A naive scheduler which checks every updateInterval if software needs to be deleted
*/
class PurgeUnreferencedSoftwares(
softwareService : SoftwareService
, updateInterval : FiniteDuration
) {

val logger = ScheduledJobLogger


if (updateInterval < 1.hour) {
logger.info(s"Disable automatic purge of unreferenced softwares (update interval cannot be less than 1 hour)")
} else {
logger.debug(s"***** starting batch that purge unreferenced softwares, every ${updateInterval.toString()} *****")
scheduler.scheduleWithFixedDelay(12.second, updateInterval) {
softwareService.deleteUnreferencedSoftware() match {
case Full(softwares) =>
logger.info(s"Purged ${softwares.length} unreferenced softwares")
if (logger.isDebugEnabled && softwares.length > 0)
logger.debug(s"Purged following software: ${softwares.mkString(",")}")
case e: EmptyBox =>
val error = (e ?~! s"Error when deleting unreferenced softwares")
logger.error(error.messageChain)
error.rootExceptionCause.foreach(ex =>
logger.error("Exception was:", ex)
)
}
}
}
}

Loading