Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lower Pidfile stale_age multiplier. Add pidfile to cache log message. #51714

Merged
merged 7 commits into from
Oct 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions base/loading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3053,24 +3053,27 @@ global parse_pidfile_hook
# the same package cannot be precompiled from different projects and/or different preferences at the same time.
compilecache_pidfile_path(pkg::PkgId) = compilecache_path(pkg, UInt64(0); project="") * ".pidfile"

const compilecache_pidlock_stale_age = 10

# Allows processes to wait if another process is precompiling a given source already.
# The lock file mtime will be updated when held every `stale_age/2` seconds.
# The lock file mtime will be updated when held at most every `stale_age/2` seconds, with expected
# variance of 10 seconds or more being infrequent but not unusual.
# After `stale_age` seconds beyond the mtime of the lock file, the lock file is deleted and
# precompilation will proceed if
# - the locking process no longer exists
# - the lock is held by another host, since processes cannot be checked remotely
# or after `stale_age * 25` seconds if the process does still exist.
function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String; stale_age=10)
# precompilation will proceed if the locking process no longer exists or after `stale_age * 5`
# seconds if the process does still exist.
# If the lock is held by another host, it will conservatively wait `stale_age * 5`
# seconds since processes cannot be checked remotely
function maybe_cachefile_lock(f, pkg::PkgId, srcpath::String; stale_age=compilecache_pidlock_stale_age)
if @isdefined(mkpidlock_hook) && @isdefined(trymkpidlock_hook) && @isdefined(parse_pidfile_hook)
pidfile = compilecache_pidfile_path(pkg)
cachefile = invokelatest(trymkpidlock_hook, f, pidfile; stale_age)
if cachefile === false
pid, hostname, age = invokelatest(parse_pidfile_hook, pidfile)
verbosity = isinteractive() ? CoreLogging.Info : CoreLogging.Debug
if isempty(hostname) || hostname == gethostname()
@logmsg verbosity "Waiting for another process (pid: $pid) to finish precompiling $pkg"
@logmsg verbosity "Waiting for another process (pid: $pid) to finish precompiling $pkg. Pidfile: $pidfile"
else
@logmsg verbosity "Waiting for another machine (hostname: $hostname, pid: $pid) to finish precompiling $pkg"
@logmsg verbosity "Waiting for another machine (hostname: $hostname, pid: $pid) to finish precompiling $pkg. Pidfile: $pidfile"
end
# wait until the lock is available, but don't actually acquire it
# returning nothing indicates a process waited for another
Expand Down
21 changes: 12 additions & 9 deletions stdlib/FileWatching/src/pidfile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ Optional keyword arguments:
- `mode`: file access mode (modified by the process umask). Defaults to world-readable.
- `poll_interval`: Specify the maximum time to between attempts (if `watch_file` doesn't work)
- `stale_age`: Delete an existing pidfile (ignoring the lock) if it is older than this many seconds, based on its mtime.
The file won't be deleted until 25x longer than this if the pid in the file appears that it may be valid.
The file won't be deleted until 5x longer than this if the pid in the file appears that it may be valid.
IanButterworth marked this conversation as resolved.
Show resolved Hide resolved
Or 25x longer if `refresh` is overridden to 0 to disable lock refreshing.
By default this is disabled (`stale_age` = 0), but a typical recommended value would be about 3-5x an
estimated normal completion time.
- `refresh`: Keeps a lock from becoming stale by updating the mtime every interval of time that passes.
Expand Down Expand Up @@ -64,7 +65,7 @@ mutable struct LockMonitor
atdir, atname = splitdir(at)
isempty(atdir) && (atdir = pwd())
at = realpath(atdir) * path_separator * atname
fd = open_exclusive(at; stale_age=stale_age, kwopts...)
fd = open_exclusive(at; stale_age, refresh, kwopts...)
update = nothing
try
write_pidfile(fd, pid)
Expand Down Expand Up @@ -185,15 +186,16 @@ function isvalidpid(hostname::AbstractString, pid::Cuint)
end

"""
stale_pidfile(path::String, stale_age::Real) :: Bool
stale_pidfile(path::String, stale_age::Real, refresh::Real) :: Bool

Helper function for `open_exclusive` for deciding if a pidfile is stale.
"""
function stale_pidfile(path::String, stale_age::Real)
function stale_pidfile(path::String, stale_age::Real, refresh::Real)
pid, hostname, age = parse_pidfile(path)
age < -stale_age && @warn "filesystem time skew detected" path=path
longer_factor = refresh == 0 ? 25 : 5
if age > stale_age
if (age > stale_age * 25) || !isvalidpid(hostname, pid)
if (age > stale_age * longer_factor) || !isvalidpid(hostname, pid)
return true
end
end
Expand All @@ -220,7 +222,7 @@ struct PidlockedError <: Exception
end

"""
open_exclusive(path::String; mode, poll_interval, wait, stale_age) :: File
open_exclusive(path::String; mode, poll_interval, wait, stale_age, refresh) :: File

Create a new a file for read-write advisory-exclusive access.
If `wait` is `false` then error out if the lock files exist
Expand All @@ -232,13 +234,14 @@ function open_exclusive(path::String;
mode::Integer = 0o444 #= read-only =#,
poll_interval::Real = 10 #= seconds =#,
wait::Bool = true #= return on failure if false =#,
stale_age::Real = 0 #= disabled =#)
stale_age::Real = 0 #= disabled =#,
refresh::Real = stale_age/2)
# fast-path: just try to open it
file = tryopen_exclusive(path, mode)
file === nothing || return file
if !wait
if file === nothing && stale_age > 0
if stale_age > 0 && stale_pidfile(path, stale_age)
if stale_age > 0 && stale_pidfile(path, stale_age, refresh)
@warn "attempting to remove probably stale pidfile" path=path
tryrmopenfile(path)
end
Expand All @@ -264,7 +267,7 @@ function open_exclusive(path::String;
file = tryopen_exclusive(path, mode)
file === nothing || return file
Base.wait(t) # sleep for a bit before trying again
if stale_age > 0 && stale_pidfile(path, stale_age)
if stale_age > 0 && stale_pidfile(path, stale_age, refresh)
# if the file seems stale, try to remove it before attempting again
# set stale_age to zero so we won't attempt again, even if the attempt fails
stale_age -= stale_age
Expand Down
35 changes: 25 additions & 10 deletions stdlib/FileWatching/test/pidfile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -203,18 +203,33 @@ end

@assert !ispath("pidfile")
@testset "open_exclusive: break lock" begin
# test for stale_age
t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10)::File
try
write_pidfile(f, getpid())
finally
@testset "using stale_age without lock refreshing" begin
t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10, refresh=0)::File
try
write_pidfile(f, getpid())
finally
close(f)
end
@test t < 2
t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=1, refresh=0)::File
close(f)
@test 20 < t < 50
rm("pidfile")
end

@testset "using stale_age with lock refreshing on (default)" begin
t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10)::File
try
write_pidfile(f, getpid())
finally
close(f)
end
@test t < 2
t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=5)::File
close(f)
@test 20 < t < 50
rm("pidfile")
end
@test t < 2
t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=1)::File
close(f)
@test 20 < t < 50
rm("pidfile")

t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10)::File
close(f)
Expand Down