Skip to content

Commit

Permalink
Gate disk prune behind env var + logging reductions (#4188)
Browse files Browse the repository at this point in the history
* Gate disk prune behind env var + logging reductions

* debug logging

* more debugging

* Bug fix and logs

* final log cleanup
  • Loading branch information
dmanjunath committed Oct 26, 2022
1 parent efed42e commit 4764a06
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 9 deletions.
6 changes: 6 additions & 0 deletions creator-node/src/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,12 @@ const config = convict({
env: 'premiumContentEnabled',
default: false
},
diskPruneEnabled: {
doc: 'whether DiskManager.sweepSubdirectoriesInFiles() should run',
format: Boolean,
env: 'diskPruneEnabled',
default: true
},

/** sync / snapback configs */

Expand Down
31 changes: 23 additions & 8 deletions creator-node/src/diskManager.js
Original file line number Diff line number Diff line change
Expand Up @@ -431,39 +431,51 @@ class DiskManager {
for (let i = 0; i < subdirectories.length; i += 1) {
try {
const subdirectory = subdirectories[i]
genericLogger.info(
`diskManager#sweepSubdirectoriesInFiles - iteration ${i} out of ${subdirectories.length}`
)

const cidsToFilePathMap = await this.listNestedCIDsInFilePath(
subdirectory
)
const cidsInSubdirectory = Object.keys(cidsToFilePathMap)

const queryResults = await models.File.findAll({
attributes: ['multihash', 'storagePath'],
raw: true,
where: {
multihash: {
[models.Sequelize.Op.in]: Object.keys(cidsToFilePathMap)
[models.Sequelize.Op.in]: cidsInSubdirectory
}
}
})

genericLogger.debug(
`diskManager#sweepSubdirectoriesInFiles - iteration ${i} out of ${
subdirectories.length
}. subdirectory: ${subdirectory}. got ${
Object.keys(cidsToFilePathMap).length
} files in folder and ${
queryResults.length
} results from db. files: ${Object.keys(
cidsToFilePathMap
).toString()}. db records: ${JSON.stringify(queryResults)}`
)

const cidsInDB = new Set()
for (const file of queryResults) {
cidsInDB.add(file.multihash)
}

const cidsToDelete = []
const cidsNotToDelete = []
for (const cid of cidsInDB) {
// iterate through all files on disk and check if db contains it
for (const cid of cidsInSubdirectory) {
// if db doesn't contain file, log as okay to delete
if (!cidsInDB.has(cid)) {
cidsToDelete.push(cid)
} else cidsNotToDelete.push(cid)
}

if (cidsNotToDelete.length > 0) {
genericLogger.info(
genericLogger.debug(
`diskmanager.js - not safe to delete ${cidsNotToDelete.toString()}`
)
}
Expand Down Expand Up @@ -493,8 +505,11 @@ class DiskManager {
if (redoJob) return this.sweepSubdirectoriesInFiles()
}

static async _execShellCommand(cmd) {
genericLogger.info(`diskManager - about to call _execShellCommand: ${cmd}`)
static async _execShellCommand(cmd, log = false) {
if (log)
genericLogger.info(
`diskManager - about to call _execShellCommand: ${cmd}`
)
const { stdout, stderr } = await exec(`${cmd}`, {
maxBuffer: 1024 * 1024 * 5
}) // 5mb buffer
Expand Down
7 changes: 6 additions & 1 deletion creator-node/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,12 @@ const startAppForPrimary = async () => {
})

// do not await this, this should just run in background for now
DiskManager.sweepSubdirectoriesInFiles()
// wait one minute before starting this because it might cause init to degrade
if (config.get('diskPruneEnabled')) {
setTimeout(() => {
DiskManager.sweepSubdirectoriesInFiles()
}, 60_000)
}
}

// Workers don't share memory, so each one is its own Express instance with its own version of objects like serviceRegistry
Expand Down

0 comments on commit 4764a06

Please sign in to comment.