Skip to content

Commit

Permalink
Delete features for organism in smaller batches (#1877)
Browse files Browse the repository at this point in the history
* added get all track services method

* added a loop for max of 10K

* removed yucky script

* removed unused file
  • Loading branch information
nathandunn committed Apr 16, 2018
1 parent c72430b commit 01a7c1c
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 41 deletions.
95 changes: 54 additions & 41 deletions grails-app/services/org/bbop/apollo/OrganismService.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -18,56 +18,69 @@ class OrganismService {
def featureService
def configWrapperService

int MAX_DELETE_SIZE = 10000
int TRANSACTION_SIZE = 30

@NotTransactional
deleteAllFeaturesForOrganism(Organism organism) {
def deleteAllFeaturesForOrganism(Organism organism) {

def featurePairs = Feature.executeQuery("select f.id,f.uniqueName from Feature f join f.featureLocations fl join fl.sequence s join s.organism o where o=:organism", [organism: organism])
// maximum transaction size 30
log.debug "feature sublists created ${featurePairs.size()}"
def featureSubLists = featurePairs.collate(TRANSACTION_SIZE)
if (!featureSubLists) {
log.warn("Nothing to delete for ${organism?.commonName}")
return
}
log.debug "sublists size ${featureSubLists.size()}"
int count = 0
long startTime = System.currentTimeMillis()
long endTime
double totalTime
featureSubLists.each { featureList ->
if (featureList) {
def ids = featureList.collect() {
it[0]
}
log.info"ids ${ids.size()}"
def uniqueNames = featureList.collect() {
it[1]
}
log.debug "uniqueNames ${uniqueNames.size()}"
Feature.withNewTransaction{
def features = Feature.findAllByIdInList(ids)
features.each { f ->
f.delete()

int totalDeleted = 0
println "organism ${organism}"
def featureCount = Feature.executeQuery("select count(f) from Feature f join f.featureLocations fl join fl.sequence s join s.organism o where o=:organism", [organism: organism])[0]
println "features to delete ${featureCount}"
while(featureCount>0){
def featurePairs = Feature.executeQuery("select f.id,f.uniqueName from Feature f join f.featureLocations fl join fl.sequence s join s.organism o where o=:organism", [max:MAX_DELETE_SIZE,organism: organism])
// maximum transaction size 30
log.debug "feature sublists created ${featurePairs.size()}"
def featureSubLists = featurePairs.collate(TRANSACTION_SIZE)
if (!featureSubLists) {
log.warn("Nothing to delete for ${organism?.commonName}")
return
}
log.debug "sublists size ${featureSubLists.size()}"
int count = 0
long startTime = System.currentTimeMillis()
long endTime
double totalTime
featureSubLists.each { featureList ->
if (featureList) {
def ids = featureList.collect() {
it[0]
}
log.info"ids ${ids.size()}"
def uniqueNames = featureList.collect() {
it[1]
}
def featureEvents = FeatureEvent.findAllByUniqueNameInList(uniqueNames)
featureEvents.each { fe ->
fe.delete()
log.debug "uniqueNames ${uniqueNames.size()}"
Feature.withNewTransaction{
def features = Feature.findAllByIdInList(ids)
features.each { f ->
f.delete()
}
def featureEvents = FeatureEvent.findAllByUniqueNameInList(uniqueNames)
featureEvents.each { fe ->
fe.delete()
}
organism.save(flush: true)
count += featureList.size()
log.info "${count} / ${featurePairs.size()} = ${100 * count / featurePairs.size()}% "
}
organism.save(flush: true)
count += featureList.size()
log.info "${count} / ${featurePairs.size()} = ${100 * count / featurePairs.size()}% "
log.info "deleted ${featurePairs.size()}"
}
log.info "deleted ${featurePairs.size()}"
endTime = System.currentTimeMillis()
totalTime = (endTime - startTime) / 1000.0f
startTime = System.currentTimeMillis()
double rate = featureList.size() / totalTime
log.info "Deleted ${rate} features / sec"
}
endTime = System.currentTimeMillis()
totalTime = (endTime - startTime) / 1000.0f
startTime = System.currentTimeMillis()
double rate = featureList.size() / totalTime
log.info "Deleted ${rate} features / sec"
totalDeleted += featurePairs.size()

featureCount = Feature.executeQuery("select count(f) from Feature f join f.featureLocations fl join fl.sequence s join s.organism o where o=:organism", [organism: organism])[0]
println "features remaining to delete ${featureCount} vs deleted ${totalDeleted}"
}
return featurePairs.size()
return totalDeleted

}


Expand Down
File renamed without changes.
File renamed without changes.

0 comments on commit 01a7c1c

Please sign in to comment.