Skip to content

Commit

Permalink
Improve command cleanup process by removing problematic predicate and…
Browse files Browse the repository at this point in the history
… re-ordering operation (#1117)

The job created threshold within the subquery for finding commands to deactivate could flip into a full table scan if the number of jobs matching the predicate passed a certain threshold.

This change removes that predicate to ensure the query uses nothing but indices. With the command creation threshold long enough and no job in database using it this should be relatively safe.

To be double safe the command deletion has been moved before the command deactivation so that the time between database cleanup invocations (24 hours for example) will be available for any
problems to surface and a command to be reactivated if for some reason someone needs it still. Unlikely if someone hasn't used it in the last X days anyway.
  • Loading branch information
tgianos committed Sep 23, 2021
1 parent 3a6ef8f commit ff98e69
Show file tree
Hide file tree
Showing 9 changed files with 18 additions and 126 deletions.
8 changes: 1 addition & 7 deletions genie-docs/src/docs/asciidoc/_properties.adoc
Expand Up @@ -671,13 +671,7 @@ of 2.0
|genie.tasks.database-cleanup.command-deactivation.commandCreationThreshold
|The number of days before the current cleanup run that a command must have been created before in the system to be
considered for deactivation.
|false
|yes

|genie.tasks.database-cleanup.command-deactivation.jobCreationThreshold
|The number of days before the current cleanup run that command must not have been used in a job for that command to be
considered for deactivation.
|false
|60
|yes

|genie.tasks.database-cleanup.command-deactivation.skip
Expand Down
Expand Up @@ -946,7 +946,6 @@ void testFindCommandsMatchingCriterion() throws Exception {
)
void testUpdateStatusForUnusedCommands() {
final Instant present = Instant.parse("2020-03-24T00:00:00.000Z");
final Instant jobThreshold = present.minus(30, ChronoUnit.DAYS);
final Instant commandThreshold = present.minus(60, ChronoUnit.DAYS);
final int batchSize = 100;
Assertions
Expand All @@ -955,7 +954,6 @@ void testUpdateStatusForUnusedCommands() {
CommandStatus.INACTIVE,
commandThreshold,
EnumSet.of(CommandStatus.ACTIVE, CommandStatus.DEPRECATED),
jobThreshold,
batchSize
)
)
Expand Down
Expand Up @@ -492,23 +492,19 @@ void addClusterCriterionForCommand(

/**
* Update the status of a command to the {@literal desiredStatus} if its status is in {@literal currentStatuses},
* it was created before {@literal commandCreatedThreshold} and it hasn't been used in any job that was created
* in the Genie system after {@literal jobCreatedThreshold}.
* it was created before {@literal commandCreatedThreshold} and it hasn't been used in any job.
*
* @param desiredStatus The new status the matching commands should have
* @param commandCreatedThreshold The instant in time which a command must have been created before to be
* considered for update. Exclusive
* @param currentStatuses The set of current statuses a command must have to be considered for update
* @param jobCreatedThreshold The instant in time after which a command must not have been used in a Genie job
* for it to be considered for update. Inclusive.
* @param batchSize The maximum number of commands to update in a single transaction
* @return The number of commands whose statuses were updated to {@literal desiredStatus}
*/
int updateStatusForUnusedCommands(
CommandStatus desiredStatus,
Instant commandCreatedThreshold,
Set<CommandStatus> currentStatuses,
Instant jobCreatedThreshold,
int batchSize
);

Expand Down
Expand Up @@ -1257,34 +1257,30 @@ public int updateStatusForUnusedCommands(
final CommandStatus desiredStatus,
final Instant commandCreatedThreshold,
final Set<CommandStatus> currentStatuses,
final Instant jobCreatedThreshold,
final int batchSize
) {
log.info(
"Attempting to update at most {} commands with statuses {} "
+ "which were created before {} and haven't been used in jobs created after {} to new status {}",
+ "which were created before {} and haven't been used in jobs to new status {}",
batchSize,
currentStatuses,
commandCreatedThreshold,
jobCreatedThreshold,
desiredStatus
);
final int updateCount = this.commandRepository.setUnusedStatus(
final int updateCount = this.commandRepository.setStatusWhereIdIn(
desiredStatus.name(),
this.commandRepository.findOldCommands(
commandCreatedThreshold,
this.commandRepository.findUnusedCommandsByStatusesCreatedBefore(
currentStatuses.stream().map(Enum::name).collect(Collectors.toSet()),
jobCreatedThreshold,
commandCreatedThreshold,
batchSize
)
);
log.info(
"Updated {} commands with statuses {} "
+ "which were created before {} and haven't been used in jobs created after {} to new status {}",
+ "which were created before {} and haven't been used in any jobs to new status {}",
updateCount,
currentStatuses,
commandCreatedThreshold,
jobCreatedThreshold,
desiredStatus
);
return updateCount;
Expand All @@ -1306,7 +1302,7 @@ public long deleteUnusedCommands(
commandCreatedThreshold
);
return this.commandRepository.deleteByIdIn(
this.commandRepository.findUnusedCommands(
this.commandRepository.findUnusedCommandsByStatusesCreatedBefore(
deleteStatuses.stream().map(Enum::name).collect(Collectors.toSet()),
commandCreatedThreshold,
batchSize
Expand Down
Expand Up @@ -33,44 +33,12 @@
public interface JpaCommandRepository extends JpaBaseRepository<CommandEntity> {

/**
* The query used to set commands to a given status given input parameters.
* The query used to find commands that are in a certain status, not used in jobs and created some time ago.
*/
String SET_UNUSED_STATUS_QUERY =
"UPDATE commands"
+ " SET status = :desiredStatus"
+ " WHERE status IN (:currentStatuses)"
+ " AND created < :commandCreatedThreshold"
+ " AND id NOT IN ("
+ "SELECT DISTINCT(command_id)"
+ " FROM jobs"
+ " WHERE command_id IS NOT NULL"
+ " AND created >= :jobCreatedThreshold"
+ ")";

/**
* The query to find the id's of commands that should be marked inactive due to not being used.
*/
// NOTE TO FUTURE SELF: JPQL does not support limit and wants you to use page size etc. Not worth hassle.
String FIND_OLD_COMMANDS_QUERY =
"SELECT id"
+ " FROM commands"
+ " WHERE status IN (:currentStatuses)"
+ " AND created < :commandCreatedThreshold"
+ " AND id NOT IN ("
+ "SELECT DISTINCT(command_id)"
+ " FROM jobs"
+ " WHERE command_id IS NOT NULL"
+ " AND created >= :jobCreatedThreshold"
+ ")"
+ " LIMIT :limit";

/**
* The query used to find commands that are unused to delete.
*/
String FIND_UNUSED_COMMANDS_QUERY =
String FIND_UNUSED_COMMANDS_IN_STATUS_CREATED_BEFORE_QUERY =
"SELECT id"
+ " FROM commands"
+ " WHERE status IN (:unusedStatuses)"
+ " WHERE status IN (:statuses)"
+ " AND created < :commandCreatedThreshold"
+ " AND id NOT IN ("
+ "SELECT DISTINCT(command_id)"
Expand All @@ -79,27 +47,6 @@ public interface JpaCommandRepository extends JpaBaseRepository<CommandEntity> {
+ ")"
+ " LIMIT :limit";


/**
* Find any commands that aren't currently attached to a job newer than a certain threshold and the command itself
* was created before a given time.
*
* @param commandCreatedThreshold The instant in time which a command must have been created before to be
* considered for update. Exclusive
* @param currentStatuses The set of current statuses a command must have to be considered for update
* @param jobCreatedThreshold The instant in time after which a command must not have been used in a Genie job
* for it to be considered for update. Inclusive.
* @param limit The maximum number of commands to retrieve
* @return The ids of the commands that matched the predicate
*/
@Query(value = FIND_OLD_COMMANDS_QUERY, nativeQuery = true)
Set<Long> findOldCommands(
@Param("commandCreatedThreshold") Instant commandCreatedThreshold,
@Param("currentStatuses") Set<String> currentStatuses,
@Param("jobCreatedThreshold") Instant jobCreatedThreshold,
@Param("limit") int limit
);

/**
* Bulk set the status of commands which match the given inputs.
*
Expand All @@ -109,24 +56,24 @@ Set<Long> findOldCommands(
*/
@Query(value = "UPDATE CommandEntity c SET c.status = :desiredStatus WHERE c.id IN (:commandIds)")
@Modifying
int setUnusedStatus(
int setStatusWhereIdIn(
@Param("desiredStatus") String desiredStatus,
@Param("commandIds") Set<Long> commandIds
);

/**
* Find commands from the database where their status is in {@literal deleteStatuses} they were created
* Find commands from the database where their status is in {@literal statuses} and they were created
* before {@literal commandCreatedThreshold} and they aren't attached to any jobs still in the database.
*
* @param unusedStatuses The set of statuses a command must be in in order to be considered unused
* @param statuses The set of statuses a command must be in for it to be considered unused
* @param commandCreatedThreshold The instant in time a command must have been created before to be considered
* unused. Exclusive.
* @param limit Maximum number of IDs to return
* @return The ids of the commands that are considered unused
*/
@Query(value = FIND_UNUSED_COMMANDS_QUERY, nativeQuery = true)
Set<Long> findUnusedCommands(
@Param("unusedStatuses") Set<String> unusedStatuses,
@Query(value = FIND_UNUSED_COMMANDS_IN_STATUS_CREATED_BEFORE_QUERY, nativeQuery = true)
Set<Long> findUnusedCommandsByStatusesCreatedBefore(
@Param("statuses") Set<String> statuses,
@Param("commandCreatedThreshold") Instant commandCreatedThreshold,
@Param("limit") int limit
);
Expand Down
Expand Up @@ -225,13 +225,6 @@ public static class CommandDeactivationDatabaseCleanupProperties {
public static final String COMMAND_CREATION_THRESHOLD_PROPERTY
= COMMAND_DEACTIVATION_PROPERTY_PREFIX + ".commandCreationThreshold";

/**
* The number of days before the current cleanup run that command must not have been used in a job for that
* command to be considered for deactivation.
*/
public static final String JOB_CREATION_THRESHOLD_PROPERTY
= COMMAND_DEACTIVATION_PROPERTY_PREFIX + ".jobCreationThreshold";

/**
* Skip deactivating commands when performing database cleanup.
*/
Expand All @@ -243,13 +236,6 @@ public static class CommandDeactivationDatabaseCleanupProperties {
*/
@Min(1)
private int commandCreationThreshold = 60;

/**
* The number of days before the current cleanup run that command must not have been used in a job for that
* command to be considered for deactivation.
*/
@Min(1)
private int jobCreationThreshold = 30;
}

/**
Expand Down
Expand Up @@ -180,8 +180,8 @@ public void run() {
final Instant creationThreshold = runtime.minus(1L, ChronoUnit.HOURS);

this.deleteClusters(creationThreshold);
this.deactivateCommands(runtime);
this.deleteCommands(creationThreshold);
this.deactivateCommands(runtime);
this.deleteApplications(creationThreshold);
this.deleteFiles(creationThreshold);
this.deleteTags(creationThreshold);
Expand Down Expand Up @@ -445,16 +445,6 @@ private void deactivateCommands(final Instant runtime) {
),
ChronoUnit.DAYS
);
final Instant jobCreationThreshold = runtime.minus(
this.environment.getProperty(
DatabaseCleanupProperties
.CommandDeactivationDatabaseCleanupProperties
.JOB_CREATION_THRESHOLD_PROPERTY,
Integer.class,
this.cleanupProperties.getCommandDeactivation().getJobCreationThreshold()
),
ChronoUnit.DAYS
);
log.info(
"Attempting to set commands to status {} that were previously in one of {} in batches of {}",
CommandStatus.INACTIVE,
Expand All @@ -468,7 +458,6 @@ private void deactivateCommands(final Instant runtime) {
CommandStatus.INACTIVE,
commandCreationThreshold,
TO_DEACTIVATE_COMMAND_STATUSES,
jobCreationThreshold,
batchSize
);
totalDeactivatedCommands += batchedDeactivated;
Expand Down
Expand Up @@ -47,7 +47,6 @@ void canGetDefaultValues() {
Assertions.assertThat(this.properties.getCommandCleanup().isSkip()).isFalse();
Assertions.assertThat(this.properties.getCommandDeactivation().isSkip()).isFalse();
Assertions.assertThat(this.properties.getCommandDeactivation().getCommandCreationThreshold()).isEqualTo(60);
Assertions.assertThat(this.properties.getCommandDeactivation().getJobCreationThreshold()).isEqualTo(30);
Assertions.assertThat(this.properties.getJobCleanup().isSkip()).isFalse();
Assertions.assertThat(this.properties.getJobCleanup().getRetention()).isEqualTo(90);
Assertions.assertThat(this.properties.getJobCleanup().getMaxDeletedPerTransaction()).isEqualTo(1000);
Expand Down Expand Up @@ -149,13 +148,4 @@ void canSetCommandDeactivationCommandCreationThreshold() {
.assertThat(this.properties.getCommandDeactivation().getCommandCreationThreshold())
.isEqualTo(newThreshold);
}

@Test
void canSetCommandDeactivationJobCreationThreshold() {
final int newThreshold = this.properties.getCommandDeactivation().getJobCreationThreshold() + 1;
this.properties.getCommandDeactivation().setJobCreationThreshold(newThreshold);
Assertions
.assertThat(this.properties.getCommandDeactivation().getJobCreationThreshold())
.isEqualTo(newThreshold);
}
}
Expand Up @@ -132,7 +132,6 @@ void canRun() {
Mockito.when(this.jobCleanupProperties.getRetention()).thenReturn(days).thenReturn(negativeDays);
Mockito.when(this.jobCleanupProperties.getPageSize()).thenReturn(pageSize);
Mockito.when(this.commandDeactivationProperties.getCommandCreationThreshold()).thenReturn(60);
Mockito.when(this.commandDeactivationProperties.getJobCreationThreshold()).thenReturn(30);

final ArgumentCaptor<Instant> argument = ArgumentCaptor.forClass(Instant.class);

Expand Down Expand Up @@ -176,7 +175,6 @@ void canRun() {
Mockito.eq(CommandStatus.INACTIVE),
Mockito.any(Instant.class),
Mockito.eq(EnumSet.of(CommandStatus.DEPRECATED, CommandStatus.ACTIVE)),
Mockito.any(Instant.class),
Mockito.anyInt()
)
)
Expand Down Expand Up @@ -240,7 +238,6 @@ void canRun() {
Mockito.eq(CommandStatus.INACTIVE),
Mockito.any(Instant.class),
Mockito.eq(EnumSet.of(CommandStatus.DEPRECATED, CommandStatus.ACTIVE)),
Mockito.any(Instant.class),
Mockito.anyInt()
);
}
Expand Down Expand Up @@ -313,7 +310,6 @@ void skipAll() {
Mockito.any(CommandStatus.class),
Mockito.any(Instant.class),
Mockito.anySet(),
Mockito.any(Instant.class),
Mockito.anyInt()
);
Mockito
Expand Down

0 comments on commit ff98e69

Please sign in to comment.