Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify and speed up VulnerabilityMetricsUpdateTask #2481

Merged
merged 9 commits into from Feb 20, 2023
11 changes: 11 additions & 0 deletions src/main/java/org/dependencytrack/model/VulnerabilityMetrics.java
Expand Up @@ -103,4 +103,15 @@ public Date getMeasuredAt() {
public void setMeasuredAt(Date measuredAt) {
this.measuredAt = measuredAt;
}

public VulnerabilityMetrics() {}

public VulnerabilityMetrics(int year, Integer month, int count, Date measuredAt) {
this.year = year;
this.month = month;
this.count = count;
this.measuredAt = measuredAt;
}


}
Expand Up @@ -18,20 +18,22 @@
*/
package org.dependencytrack.persistence;

import alpine.persistence.PaginatedResult;
import alpine.resources.AlpineRequest;
import org.apache.commons.collections4.CollectionUtils;
import java.util.Date;
import java.util.List;

import javax.jdo.PersistenceManager;
import javax.jdo.Query;

import org.datanucleus.PropertyNames;
import org.dependencytrack.model.Component;
import org.dependencytrack.model.DependencyMetrics;
import org.dependencytrack.model.PortfolioMetrics;
import org.dependencytrack.model.Project;
import org.dependencytrack.model.ProjectMetrics;
import org.dependencytrack.model.VulnerabilityMetrics;

import javax.jdo.PersistenceManager;
import javax.jdo.Query;
import java.util.Date;
import java.util.List;
import alpine.persistence.PaginatedResult;
import alpine.resources.AlpineRequest;

public class MetricsQueryManager extends QueryManager implements IQueryManager {

Expand Down Expand Up @@ -162,34 +164,21 @@ public List<DependencyMetrics> getDependencyMetricsSince(Component component, Da
return (List<DependencyMetrics>)query.execute(component, since);
}

/**
* Synchronizes VulnerabilityMetrics.
*/
public void synchronizeVulnerabilityMetrics(VulnerabilityMetrics metric) {
final Query<VulnerabilityMetrics> query;
final List<VulnerabilityMetrics> result;
if (metric.getMonth() == null) {
query = pm.newQuery(VulnerabilityMetrics.class, "year == :year && month == null");
result = execute(query, metric.getYear()).getList(VulnerabilityMetrics.class);
} else {
query = pm.newQuery(VulnerabilityMetrics.class, "year == :year && month == :month");
result = execute(query, metric.getYear(), metric.getMonth()).getList(VulnerabilityMetrics.class);
}
if (result.size() == 1) {
final VulnerabilityMetrics m = result.get(0);
m.setCount(metric.getCount());
m.setMeasuredAt(metric.getMeasuredAt());
persist(m);
} else if (CollectionUtils.isEmpty(result)) {
persist(metric);
} else {
delete(result);
persist(metric);
}
public void synchronizeVulnerabilityMetrics(List<VulnerabilityMetrics> metrics) {
// No need for complex updating, just replace the existing ~400 rows with new ones
// Unless we have a contract with clients that the ID of metric records cannot change?

pm.setProperty(PropertyNames.PROPERTY_QUERY_JDOQL_ALLOWALL, "true");
valentijnscholten marked this conversation as resolved.
Show resolved Hide resolved
final Query<VulnerabilityMetrics> delete = pm.newQuery("DELETE FROM org.dependencytrack.model.VulnerabilityMetrics");
delete.execute();

// This still does ~400 queries, probably because not all databases can do bulk insert with autogenerated PKs
// Or because Datanucleus is trying to be smart as it wants to cache all these instances
pm.makePersistentAll(metrics);
valentijnscholten marked this conversation as resolved.
Show resolved Hide resolved
}

/**
* Deleted all metrics associated for the specified Project.
* Delete all metrics associated for the specified Project.
* @param project the Project to delete metrics for
*/
void deleteMetrics(Project project) {
Expand All @@ -201,11 +190,12 @@ void deleteMetrics(Project project) {
}

/**
* Deleted all metrics associated for the specified Component.
* Delete all metrics associated for the specified Component.
* @param component the Component to delete metrics for
*/
void deleteMetrics(Component component) {
final Query<DependencyMetrics> query = pm.newQuery(DependencyMetrics.class, "component == :component");
query.deletePersistentAll(component);
}

}
Expand Up @@ -1057,8 +1057,8 @@ public List<DependencyMetrics> getDependencyMetricsSince(Component component, Da
return getMetricsQueryManager().getDependencyMetricsSince(component, since);
}

public void synchronizeVulnerabilityMetrics(VulnerabilityMetrics metric) {
getMetricsQueryManager().synchronizeVulnerabilityMetrics(metric);
public void synchronizeVulnerabilityMetrics(List<VulnerabilityMetrics> metrics) {
getMetricsQueryManager().synchronizeVulnerabilityMetrics(metrics);
}

void deleteMetrics(Project project) {
Expand Down
Expand Up @@ -18,22 +18,25 @@
*/
package org.dependencytrack.tasks.metrics;

import alpine.common.logging.Logger;
import alpine.event.framework.Event;
import alpine.event.framework.Subscriber;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import javax.jdo.PersistenceManager;
import javax.jdo.Query;

import org.apache.commons.lang3.time.DurationFormatUtils;
import org.dependencytrack.event.VulnerabilityMetricsUpdateEvent;
import org.dependencytrack.model.Vulnerability;
import org.dependencytrack.model.VulnerabilityMetrics;
import org.dependencytrack.persistence.QueryManager;

import javax.jdo.PersistenceManager;
import javax.jdo.Query;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import alpine.common.logging.Logger;
import alpine.event.framework.Event;
import alpine.event.framework.Subscriber;

/**
* A {@link Subscriber} task that updates vulnerability metrics.
Expand All @@ -59,104 +62,72 @@ private void updateMetrics() throws Exception {
LOGGER.info("Executing metrics update on vulnerability database");

final var measuredAt = new Date();
final var yearMonthCounters = new VulnerabilityDateCounters(measuredAt, true);
final var yearCounters = new VulnerabilityDateCounters(measuredAt, false);

try (final var qm = new QueryManager()) {
final PersistenceManager pm = qm.getPersistenceManager();

LOGGER.debug("Fetching first vulnerabilities page");
List<Vulnerability> vulnerabilities = fetchNextVulnerabilitiesPage(pm, null);

while (!vulnerabilities.isEmpty()) {
for (final Vulnerability vulnerability : vulnerabilities) {
if (vulnerability.getCreated() != null) {
yearMonthCounters.updateMetrics(vulnerability.getCreated());
yearCounters.updateMetrics(vulnerability.getCreated());
} else if (vulnerability.getPublished() != null) {
yearMonthCounters.updateMetrics(vulnerability.getPublished());
yearCounters.updateMetrics(vulnerability.getPublished());
}
}

LOGGER.debug("Fetching next vulnerabilities page");
final long lastId = vulnerabilities.get(vulnerabilities.size() - 1).getId();
vulnerabilities = fetchNextVulnerabilitiesPage(pm, lastId);
}

for (final VulnerabilityMetrics metric : yearMonthCounters.getMetrics()) {
qm.synchronizeVulnerabilityMetrics(metric);
}
for (final VulnerabilityMetrics metric : yearCounters.getMetrics()) {
qm.synchronizeVulnerabilityMetrics(metric);
}
}

LOGGER.info("Completed metrics update on vulnerability database in " +
DurationFormatUtils.formatDuration(new Date().getTime() - measuredAt.getTime(), "mm:ss:SS"));
}

/**
* Fetch {@link Vulnerability}s in pages of {@code 500}.
*
* @param pm The {@link PersistenceManager} to use
* @param lastId Highest ID of the previously fetched page
* @return Up to {@code 500} {@link Vulnerability} objects
* @throws Exception If the query could not be closed
*/
private List<Vulnerability> fetchNextVulnerabilitiesPage(final PersistenceManager pm, final Long lastId) throws Exception {
try (final Query<Vulnerability> query = pm.newQuery(Vulnerability.class)) {
if (lastId != null) {
query.setFilter("id < :lastId");
query.setParameters(lastId);
}
query.setOrdering("id DESC");
query.range(0, 500);
query.getFetchPlan().setGroup(Vulnerability.FetchGroup.METRICS_UPDATE.name());
return List.copyOf(query.executeList());
/**
*
* The created field has priotiy over the published field, which is used as a fallback
* However, the created field is always empty (in my instances)
* But we leave this mechanism and field juggling in place for backwards compatibility,
* and for (future) analyzers/sources that might provide this field.
*
* BTW the queries to get these vulnerability counts are very fast,
* so strictly speaking there is no reason to create this extra table with metrics
*
*/

// Get metrics by published date but only if created field is null
Collection<YearMonthMetric> published = queryForMetrics(pm, "published", "created");

// Get metrics by created date regardless of value of published field
Collection<YearMonthMetric> created = queryForMetrics(pm, "created", null);

// Merge flat lists
published.addAll(created);

// Collect into nested map so we can sum the counts
Map<Object, Map<Object, Long>> metrics = published.stream()
valentijnscholten marked this conversation as resolved.
Show resolved Hide resolved
.filter(ymm -> ymm.year != null)
.collect(Collectors.groupingBy(ymm -> ymm.year,
Collectors.groupingBy(ymm -> ymm.month, Collectors.summingLong(ymm -> ymm.count))));

// Flatten again, but now into VulnerabilityMetrics that can be persisted
Stream<VulnerabilityMetrics> monthlyStream = metrics.entrySet().stream()
.flatMap(e -> e.getValue().entrySet().stream().flatMap(
v -> Stream.of(new VulnerabilityMetrics((Integer)e.getKey(), (Integer)v.getKey(), v.getValue().intValue(), measuredAt))));

// Flatten another time, for the yearly counts
Stream<VulnerabilityMetrics> yearlyStream = metrics.entrySet().stream()
.flatMap(e -> Stream.of(new VulnerabilityMetrics((Integer)e.getKey(), null, e.getValue().values().stream().mapToInt(d->d.intValue()).sum(), measuredAt)));
valentijnscholten marked this conversation as resolved.
Show resolved Hide resolved

// Get Mutable List to merge again
List<VulnerabilityMetrics> vulnerabilityMetrics = new ArrayList<VulnerabilityMetrics>(monthlyStream.toList());
vulnerabilityMetrics.addAll(new ArrayList<>(yearlyStream.toList()));

qm.synchronizeVulnerabilityMetrics(vulnerabilityMetrics);
valentijnscholten marked this conversation as resolved.
Show resolved Hide resolved

LOGGER.info("Completed metrics update on vulnerability database in " +
DurationFormatUtils.formatDuration(new Date().getTime() - measuredAt.getTime(), "mm:ss:SS"));
nscuro marked this conversation as resolved.
Show resolved Hide resolved
}
}

private static final class VulnerabilityDateCounters {
private final Date measuredAt;
private final boolean trackMonth;
private final List<VulnerabilityMetrics> metrics = new ArrayList<>();
private static Collection<YearMonthMetric> queryForMetrics(PersistenceManager pm, String dateField, String expectedNullField) throws Exception {

private VulnerabilityDateCounters(final Date measuredAt, final boolean trackMonth) {
this.measuredAt = measuredAt;
this.trackMonth = trackMonth;
}
// You cannot (?) parametrize fieldnames in JDOQL, so we use String formatting
String queryTemplate =
"SELECT %s.getYear() as year, %s.getMonth()+1 as month, count(this) as count " +
"FROM org.dependencytrack.model.Vulnerability " +
"WHERE %s != null && %s == null " +
"GROUP BY %s.getYear(), %s.getMonth() + 1";

private void updateMetrics(final Date timestamp) {
final LocalDateTime date = LocalDateTime.ofInstant(timestamp.toInstant(), ZoneId.systemDefault());
final int year = date.getYear();
final int month = date.getMonthValue();

boolean found = false;
for (final VulnerabilityMetrics metric : metrics) {
if (trackMonth && metric.getYear() == year && metric.getMonth() == month) {
metric.setCount(metric.getCount() + 1);
found = true;
} else if (!trackMonth && metric.getYear() == year) {
metric.setCount(metric.getCount() + 1);
found = true;
}
}
if (!found) {
final VulnerabilityMetrics metric = new VulnerabilityMetrics();
metric.setYear(year);
if (trackMonth) {
metric.setMonth(month);
}
metric.setCount(1);
metric.setMeasuredAt(measuredAt);
metrics.add(metric);
}
}
try (Query query = pm.newQuery("javax.jdo.query.JDOQL", String.format(queryTemplate, dateField, dateField, dateField, expectedNullField, dateField, dateField))) {
List<YearMonthMetric> flatMetrics = query.executeResultList(YearMonthMetric.class);

private List<VulnerabilityMetrics> getMetrics() {
return metrics;
}
// the flatMetrics list is bound to the Query, so we need to copy it to a new array to survive query closure
return new ArrayList<YearMonthMetric>(flatMetrics);
}
}

}
@@ -0,0 +1,33 @@
/*
* This file is part of Dependency-Track.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
* Copyright (c) Steve Springett. All Rights Reserved.
*/
package org.dependencytrack.tasks.metrics;

public class YearMonthMetric {
public Integer year;
public Integer month;
public Long count;

public YearMonthMetric(Integer year, Integer month, Long count) {
this.year = year;
this.month = month;
this.count = count;
}


}