Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 28 additions & 8 deletions sandbox/sandbox.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,13 +210,11 @@ func checkDirectory(sandboxDef SandboxDef) (SandboxDef, error) {
if err != nil {
return sandboxDef, fmt.Errorf(globals.ErrWhileStoppingSandbox, sandboxDir)
}
_, err = common.RunCmdWithArgs("rm", []string{"-rf", sandboxDir})
if err != nil {
if err = removeDirWithRetry(sandboxDir); err != nil {
return sandboxDef, fmt.Errorf(globals.ErrWhileDeletingSandbox, sandboxDir)
}
if logDirectory != "" {
_, err = common.RunCmdWithArgs("rm", []string{"-rf", logDirectory})
if err != nil {
if err = removeDirWithRetry(logDirectory); err != nil {
return sandboxDef, fmt.Errorf("error while deleting log directory %s", logDirectory)
}
}
Expand Down Expand Up @@ -1183,6 +1181,30 @@ func getLogDirFromSbDescription(fullPath string) (string, error) {
return logDirectory, nil
}

// removeDirWithRetry runs `rm -rf <target>` with a short exponential backoff
// retry loop. After the stop/send_kill script returns, the killed mysqld
// process may not yet have finished releasing its open file descriptors —
// during that brief window, last buffered writes can land in master/data/
// after rm's readdir() walk, making the final rmdir(2) return ENOTEMPTY
// ("Directory not empty"). Retrying handles that race (issue #121).
// Total max wait: 200+400+800+1600+3200ms ≈ 6.2s.
func removeDirWithRetry(target string) error {
backoff := 200 * time.Millisecond
var lastErr error
for attempt := 0; attempt < 5; attempt++ {
if attempt > 0 {
time.Sleep(backoff)
backoff *= 2
}
if _, err := common.RunCmdWithArgs("rm", []string{"-rf", target}); err == nil {
return nil
} else {
lastErr = err
}
}
return lastErr
}
Comment on lines +1191 to +1206
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Using os.RemoveAll is more idiomatic and efficient in Go than spawning an external rm process, as it avoids the overhead of process creation and is more portable. Additionally, the loop logic can be simplified by removing the redundant else block. Note that the total sleep time with 5 attempts is actually 3 seconds (200+400+800+1600ms), which differs from the 6.2s mentioned in the docstring.

func removeDirWithRetry(target string) error {
	backoff := 200 * time.Millisecond
	var err error
	for attempt := 0; attempt < 5; attempt++ {
		if attempt > 0 {
			time.Sleep(backoff)
			backoff *= 2
		}
		if err = os.RemoveAll(target); err == nil {
			return nil
		}
	}
	return err
}


// Deprecated: use RemoveCustomSandbox instead
func RemoveSandbox(sandboxDir, sandbox string, runConcurrently bool) (execList []concurrent.ExecutionList, err error) {
fullPath := path.Join(sandboxDir, sandbox)
Expand Down Expand Up @@ -1273,8 +1295,7 @@ func RemoveSandbox(sandboxDir, sandbox string, runConcurrently bool) (execList [
if globals.UsingDbDeployer && target != logDirectory {
common.CondPrintf("Running %s\n", cmdStr)
}
_, err = common.RunCmdWithArgs("rm", rmArgs)
if err != nil {
if err = removeDirWithRetry(target); err != nil {
return emptyExecutionList, fmt.Errorf(globals.ErrWhileDeletingSandbox, target)
}
if globals.UsingDbDeployer && target != logDirectory {
Expand Down Expand Up @@ -1394,8 +1415,7 @@ func RemoveCustomSandbox(sandboxDir, sandbox string, runConcurrently, useStop bo
if globals.UsingDbDeployer && target != logDirectory {
common.CondPrintf("Running %s\n", cmdStr)
}
_, err = common.RunCmdWithArgs("rm", rmArgs)
if err != nil {
if err = removeDirWithRetry(target); err != nil {
return emptyExecutionList, fmt.Errorf(globals.ErrWhileDeletingSandbox, target)
}
if globals.UsingDbDeployer && target != logDirectory {
Expand Down
Loading