From e708900f7c20bbc18cea2696f7d70694aa8ce633 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Sun, 28 Sep 2025 00:46:06 +0200 Subject: [PATCH 01/15] Respect rate limit values in response headers Co-Authored-By: Claude --- src/utils/requests.jl | 245 ++++++++++++++++++++++-- test/retries.jl | 419 ++++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 1 + 3 files changed, 648 insertions(+), 17 deletions(-) create mode 100644 test/retries.jl diff --git a/src/utils/requests.jl b/src/utils/requests.jl index 02c33f5..6c3923a 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -58,28 +58,230 @@ api_uri(api::GitHubAPI, path) = error("URI retrieval not implemented for this AP # GitHub REST Methods # ####################### -function github_request(api::GitHubAPI, request_method, endpoint; +function safe_tryparse(args...) + try + return tryparse(args...) + catch + nothing + end +end + +""" + github_retry_decision(method::String, resp::Union{HTTP.Response, Nothing}, ex::Union{Exception, Nothing}, exponential_delay::Float64; verbose::Bool=true) -> (should_retry::Bool, sleep_seconds::Float64) + +Analyzes a GitHub API response/exception to determine if a request should be retried and how long to wait. +Uses HTTP.jl's retry logic as a foundation, then adds GitHub-specific rate limiting handling. +This function does NOT perform any sleeping - it only returns the decision and timing information. +Logs retry decisions with detailed rate limit information when retries occur (if verbose=true). + +# Arguments +- `method`: HTTP method string (e.g., "GET", "POST") +- `resp`: HTTP response object (if a response was received), or `nothing` +- `ex`: Exception that occurred (if any), or `nothing` +- `exponential_delay`: The delay from ExponentialBackOff iterator +- `verbose`: Whether to log retry decisions (default: true) + +# Returns +A tuple `(should_retry, sleep_seconds)` where: +- `should_retry`: `true` if the request should be retried, `false` otherwise +- `sleep_seconds`: Number of seconds to sleep before retry (0 if no sleep needed) + +# Retry Logic +1. First uses HTTP.jl's standard retry logic (`isrecoverable` + `isidempotent`) +2. Then adds GitHub-specific rate limiting: + - **Primary rate limit**: `x-ratelimit-remaining: 0` → wait until `x-ratelimit-reset` time + - **Secondary rate limit**: Has `retry-after` header OR error message indicates secondary → + - If `retry-after` present: use that delay + - If `x-ratelimit-remaining: 0`: wait until reset time + - Otherwise: wait at least 1 minute, then use exponential backoff +""" +function github_retry_decision(method::String, resp::Union{HTTP.Response, Nothing}, ex::Union{Exception, Nothing}, exponential_delay::Float64; verbose::Bool=true) + # If we have a response, process it first (takes precedence over exceptions) + if resp !== nothing + status = resp.status + + # Don't retry successful responses + if status < 400 + return (false, 0.0) + end + else + # No response - check if we have a recoverable exception + if ex !== nothing + # If there's an exception, check if it's recoverable and if the method is idempotent + if HTTP.RetryRequest.isrecoverable(ex) && HTTP.RetryRequest.isidempotent(method) + verbose && @info "GitHub API exception, retrying in $(round(exponential_delay, digits=1))s" method=method exception=ex + return (true, exponential_delay) + end + end + # No response and no retryable exception + return (false, 0.0) + end + + # At this point we have a response with status >= 400 + + # Handle GitHub rate limiting (403, 429) with special logic + if status in (403, 429) + # Get all rate limit headers + limit = HTTP.header(resp, "x-ratelimit-limit", "") + remaining = HTTP.header(resp, "x-ratelimit-remaining", "") + used = HTTP.header(resp, "x-ratelimit-used", "") + reset_time = HTTP.header(resp, "x-ratelimit-reset", "") + resource = HTTP.header(resp, "x-ratelimit-resource", "") + retry_after = HTTP.header(resp, "retry-after", "") + + # Check response body for secondary rate limit indicators + # Note: `String` takes ownership / removes the body, so we make a copy + body = String(copy(resp.body)) + is_secondary_rate_limit = occursin("secondary rate limit", lowercase(body)) || !isempty(retry_after) + if is_secondary_rate_limit + # Secondary rate limit handling + # If retry-after header is present, respect it + delay_seconds = safe_tryparse(Float64, retry_after) + if delay_seconds !== nothing + delay_seconds = parse(Float64, retry_after) + verbose && @info "GitHub API secondary rate limit hit, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after + return (true, delay_seconds) + end + + # If x-ratelimit-remaining is 0, wait until reset time + reset_timestamp = safe_tryparse(Float64, reset_time) + if remaining == "0" && reset_timestamp !== nothing + current_time = time() + if reset_timestamp > current_time + delay_seconds = reset_timestamp - current_time + 1.0 + verbose && @info "GitHub API secondary rate limit hit, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after + return (true, delay_seconds) + end + end + + # Otherwise, wait at least 1 minute, then use exponential backoff + delay_seconds = max(60.0, exponential_delay) + verbose && @info "GitHub API secondary rate limit hit, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after + return (true, delay_seconds) + else + # Primary rate limit handling + # If x-ratelimit-remaining is 0, wait until reset time + reset_timestamp = safe_tryparse(Float64, reset_time) + if remaining == "0" && reset_timestamp !== nothing + current_time = time() + if reset_timestamp > current_time + delay_seconds = reset_timestamp - current_time + 1.0 + verbose && @info "GitHub API primary rate limit hit, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource + return (true, delay_seconds) + end + end + + # For other primary rate limit cases, use exponential backoff + verbose && @info "GitHub API primary rate limit hit, retrying in $(round(exponential_delay, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource + return (true, exponential_delay) + end + end + + # For other HTTP errors, check if they're retryable according to HTTP.jl + if HTTP.RetryRequest.retryable(status) && HTTP.RetryRequest.isidempotent(method) + verbose && @info "GitHub API HTTP error, retrying in $(round(exponential_delay, digits=1))s" method=method status=status + return (true, exponential_delay) + end + + # For client errors (400, 401, 404, 422), don't retry + return (false, 0.0) +end + +""" + with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep) -> Any + +Generic retry wrapper that executes function `f()` with GitHub-specific retry logic. + +# Arguments +- `f`: Function to execute (should return HTTP.Response or throw exception) +- `method`: HTTP method for retry decision logic (default: "GET") +- `max_retries`: Maximum number of retry attempts (default: 5) +- `verbose`: Whether to log retry decisions (default: true) +- `sleep_fn`: Function to call for sleeping between retries (default: sleep). For testing, can be replaced with a custom function. + +# Returns +Returns the result of `f()` if successful, or re-throws the final exception if all retries fail. + +# Example +```julia +result = with_retries(method="GET", verbose=false) do + HTTP.get("https://api.github.com/user", headers) +end +``` +""" +function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep) + # Create ExponentialBackOff iterator + backoff = Base.ExponentialBackOff(n = max_retries+1) + + # Try the function, with retries on failure + for (attempt, exponential_delay) in enumerate(backoff) + last_try = attempt > max_retries + local r, ex + try + r = f() + ex = nothing + if last_try + return r + end + catch e + r = nothing + ex = e + if last_try + rethrow() + end + end + + # Check if we should retry based on this attempt + should_retry, sleep_seconds = github_retry_decision(method, r, ex, exponential_delay; verbose=verbose) + + if !should_retry + # Don't retry - return response or throw exception + if ex !== nothing + throw(ex) + else + return r + end + end + + # Sleep before the next attempt (at the end of the loop) + if sleep_seconds > 0 + sleep_fn(sleep_seconds) + end + end +end + +function github_request(api::GitHubAPI, request_method::String, endpoint; auth = AnonymousAuth(), handle_error = true, - headers = Dict(), params = Dict(), allowredirects = true) + headers = Dict(), params = Dict(), allowredirects = true, + max_retries = 5) authenticate_headers!(headers, auth) params = github2json(params) api_endpoint = api_uri(api, endpoint) _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") - if request_method == HTTP.get - r = request_method(URIs.URI(api_endpoint, query = params), _headers, redirect = allowredirects, status_exception = false, idle_timeout=20) - else - r = request_method(string(api_endpoint), _headers, JSON.json(params), redirect = allowredirects, status_exception = false, idle_timeout=20) + + # Use with_retries helper to handle the retry logic + r = with_retries(method = request_method, max_retries = max_retries) do + if request_method == "GET" + return HTTP.request(request_method, URIs.URI(api_endpoint, query = params), _headers; + redirect = allowredirects, status_exception = false, + idle_timeout = 20, retry = false) + else + return HTTP.request(request_method, string(api_endpoint), _headers, JSON.json(params); + redirect = allowredirects, status_exception = false, + idle_timeout = 20, retry = false) + end end + handle_error && handle_response_error(r) return r end -gh_get(api::GitHubAPI, endpoint = ""; options...) = github_request(api, HTTP.get, endpoint; options...) -gh_post(api::GitHubAPI, endpoint = ""; options...) = github_request(api, HTTP.post, endpoint; options...) -gh_put(api::GitHubAPI, endpoint = ""; options...) = github_request(api, HTTP.put, endpoint; options...) -gh_delete(api::GitHubAPI, endpoint = ""; options...) = github_request(api, HTTP.delete, endpoint; options...) -gh_patch(api::GitHubAPI, endpoint = ""; options...) = github_request(api, HTTP.patch, endpoint; options...) +gh_get(api::GitHubAPI, endpoint = ""; options...) = github_request(api, "GET", endpoint; options...) +gh_post(api::GitHubAPI, endpoint = ""; options...) = github_request(api, "POST", endpoint; options...) +gh_put(api::GitHubAPI, endpoint = ""; options...) = github_request(api, "PUT", endpoint; options...) +gh_delete(api::GitHubAPI, endpoint = ""; options...) = github_request(api, "DELETE", endpoint; options...) +gh_patch(api::GitHubAPI, endpoint = ""; options...) = github_request(api, "PATCH", endpoint; options...) gh_get_json(api::GitHubAPI, endpoint = ""; options...) = JSON.parse(HTTP.payload(gh_get(api, endpoint; options...), String)) gh_post_json(api::GitHubAPI, endpoint = ""; options...) = JSON.parse(HTTP.payload(gh_post(api, endpoint; options...), String)) @@ -113,15 +315,24 @@ end extract_page_url(link) = match(r"<.*?>", link).match[2:end-1] function github_paged_get(api, endpoint; page_limit = Inf, start_page = "", handle_error = true, - auth = AnonymousAuth(), headers = Dict(), params = Dict(), options...) + auth = AnonymousAuth(), headers = Dict(), params = Dict(), max_retries = 5, options...) authenticate_headers!(headers, auth) _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") + + # Helper function to make a single request with retries + function make_request_with_retries(url, headers) + # Use with_retries helper to handle the retry logic + return with_retries(method = "GET", max_retries = max_retries) do + HTTP.request("GET", url, headers; status_exception = false, retry = false) + end + end + if isempty(start_page) - r = gh_get(api, endpoint; handle_error = handle_error, headers = _headers, params = params, auth=auth, options...) + r = gh_get(api, endpoint; handle_error = handle_error, headers = _headers, params = params, auth=auth, max_retries=max_retries, options...) else @assert isempty(params) "`start_page` kwarg is incompatible with `params` kwarg" - r = HTTP.get(start_page, headers = _headers) + r = make_request_with_retries(start_page, _headers) end results = HTTP.Response[r] page_data = Dict{String, String}() @@ -131,7 +342,7 @@ function github_paged_get(api, endpoint; page_limit = Inf, start_page = "", hand links = get_page_links(r) next_index = find_page_link(links, "next") next_index == 0 && break - r = HTTP.get(extract_page_url(links[next_index]), headers = _headers) + r = make_request_with_retries(extract_page_url(links[next_index]), _headers) handle_error && handle_response_error(r) push!(results, r) page_count += 1 @@ -184,7 +395,7 @@ function handle_response_error(r::HTTP.Response) errors = get(data, "errors", "") catch end - error("Error found in GitHub reponse:\n", + error("Error found in GitHub response:\n", "\tStatus Code: $(r.status)\n", ((isempty(message) && isempty(errors)) ? ("\tBody: $body",) : @@ -210,4 +421,4 @@ function check_disallowed_name_pattern(str::AbstractString) end return str -end \ No newline at end of file +end diff --git a/test/retries.jl b/test/retries.jl new file mode 100644 index 0000000..5afd512 --- /dev/null +++ b/test/retries.jl @@ -0,0 +1,419 @@ +using Test +using HTTP +using GitHub + +@testset "github_retry_decision" begin + + @testset "HTTP.jl recoverable exceptions" begin + # Test with a potentially recoverable exception (let HTTP.jl decide) + # We'll just test that our function handles exceptions without crashing + network_ex = Base.IOError("connection reset", 104) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET", nothing, network_ex, 2.0; verbose=false) + # The actual retry decision depends on HTTP.jl's isrecoverable and isidempotent functions + @test typeof(should_retry) == Bool + @test sleep_seconds >= 0.0 + + # Test with non-recoverable exception + non_recoverable_ex = ArgumentError("invalid argument") + should_retry, sleep_seconds = GitHub.github_retry_decision("GET", nothing, non_recoverable_ex, 2.0; verbose=false) + @test should_retry == false + @test sleep_seconds == 0.0 + end + + @testset "No response and no exception" begin + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",nothing, nothing, 2.0; verbose=false) + @test should_retry == false + @test sleep_seconds == 0.0 + end + + @testset "Successful responses" begin + for status in [200, 201, 204] + resp = HTTP.Response(status) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 2.0; verbose=false) + @test should_retry == false + @test sleep_seconds == 0.0 + end + end + + @testset "Primary rate limit - x-ratelimit-remaining = 0" begin + + # Test with future reset time - use fixed timestamp to avoid race conditions + future_time = "1900000000" # Fixed timestamp in the future (year 2030) + resp = HTTP.Response(403, [ + "x-ratelimit-remaining" => "0", + "x-ratelimit-reset" => future_time + ]) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 2.0; verbose=false) + @test should_retry == true + @test sleep_seconds > 100000 # Should be a large delay since reset time is far in future + + # Test with past reset time (should use exponential backoff) + past_time = "1000000000" # Fixed timestamp in the past (year 2001) + resp2 = HTTP.Response(403, [ + "x-ratelimit-remaining" => "0", + "x-ratelimit-reset" => past_time + ]) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp2, nothing, 5.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 5.0 # Should use the exponential delay + end + + @testset "Secondary rate limit - retry-after header" begin + + # Test secondary rate limit with retry-after + body = """{"message": "You have been rate limited due to a secondary rate limit", "documentation_url": "..."}""" + resp = HTTP.Response(429, ["retry-after" => "30"]; body = Vector{UInt8}(body)) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 2.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 30.0 # Should use retry-after value + + # Test with just retry-after header (no body message) + resp2 = HTTP.Response(429, ["retry-after" => "15"]) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp2, nothing, 2.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 15.0 + end + + @testset "Secondary rate limit - message in body" begin + + # Test secondary rate limit detected from body message + body = """{"message": "You have exceeded a secondary rate limit. Please wait one minute before trying again."}""" + resp = HTTP.Response(429; body = Vector{UInt8}(body)) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 2.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 60.0 # Should wait at least 1 minute + + # Test with exponential delay greater than 60 seconds + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 120.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 120.0 # Should use the larger exponential delay + end + + @testset "Secondary rate limit - x-ratelimit-remaining = 0" begin + + # Test secondary rate limit with reset time - use fixed timestamp to avoid race conditions + future_time = "1900000000" # Fixed timestamp in the future (year 2030) + body = """{"message": "secondary rate limit exceeded"}""" + resp = HTTP.Response(403, [ + "x-ratelimit-remaining" => "0", + "x-ratelimit-reset" => future_time + ]; body = Vector{UInt8}(body)) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 5.0; verbose=false) + @test should_retry == true + @test sleep_seconds > 100000 # Should be a large delay since reset time is far in future + end + + @testset "Primary rate limit - exponential backoff" begin + + # Primary rate limit without specific headers + resp = HTTP.Response(429, []) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 4.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 4.0 # Should use exponential delay + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 8.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 8.0 + end + + @testset "Other HTTP errors" begin + + for status in [408, 409, 500, 502, 503, 504, 599] + resp = HTTP.Response(status, []) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 3.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 3.0 # Should use exponential delay + end + end + + @testset "Non-retryable client errors" begin + + for status in [400, 401, 404, 422] + resp = HTTP.Response(status, []) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 1.0; verbose=false) + @test should_retry == false + @test sleep_seconds == 0.0 + end + end + + @testset "Invalid header values" begin + + # Test with invalid retry-after header (should fall back to secondary rate limit minimum) + resp1 = HTTP.Response(429, ["retry-after" => "invalid"]) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp1, nothing, 2.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 60.0 # Falls back to secondary rate limit minimum (1 minute) + + # Test with invalid reset time (should fall back to exponential backoff) + resp2 = HTTP.Response(403, [ + "x-ratelimit-remaining" => "0", + "x-ratelimit-reset" => "invalid" + ]) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp2, nothing, 3.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 3.0 # Falls back to exponential backoff + end + + @testset "Rate limit header precedence" begin + + # retry-after should take precedence over x-ratelimit-reset + future_time = "1900000000" # Fixed timestamp (doesn't matter since retry-after takes precedence) + resp = HTTP.Response(429, [ + "retry-after" => "5", + "x-ratelimit-remaining" => "0", + "x-ratelimit-reset" => future_time + ]) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 10.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 5.0 # Should use retry-after, not reset time + end + + @testset "Rate limit remaining non-zero" begin + + # Should use exponential backoff when x-ratelimit-remaining is not "0" + future_time = "1900000000" # Fixed timestamp (doesn't matter since remaining != "0") + resp = HTTP.Response(403, [ + "x-ratelimit-remaining" => "5", + "x-ratelimit-reset" => future_time + ]) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 3.0; verbose=false) + @test should_retry == true + @test sleep_seconds == 3.0 # Should use exponential backoff, not reset time + end + + @testset "Exception with successful response" begin + + # If we have both an exception and a response, the response should take precedence + resp = HTTP.Response(200) + ex = Base.IOError("some error", 0) + + should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, ex, 2.0; verbose=false) + @test should_retry == false # Success response should not retry + @test sleep_seconds == 0.0 + end + + @testset "Request method considerations" begin + # Test with different HTTP methods to ensure retryable logic works + + # Network exception with different methods + network_ex = Base.IOError("connection refused", 111) + + # Test that both work without crashing (actual retry depends on HTTP.jl internals) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET", nothing, network_ex, 1.0; verbose=false) + @test typeof(should_retry) == Bool + @test sleep_seconds >= 0.0 + + # POST behavior depends on HTTP.jl's isidempotent() function (non-idempotent methods typically don't retry) + should_retry, sleep_seconds = GitHub.github_retry_decision("POST", nothing, network_ex, 1.0; verbose=false) + @test typeof(should_retry) == Bool + @test sleep_seconds >= 0.0 + end +end + + +@testset "with_retries function tests" begin + @testset "Success after retries" begin + call_count = Ref(0) + sleep_calls = Float64[] + + # Custom sleep function that records sleep durations + function test_sleep(seconds) + push!(sleep_calls, seconds) + end + + result = GitHub.with_retries(method="GET", max_retries=2, verbose=false, sleep_fn=test_sleep) do + call_count[] += 1 + if call_count[] < 3 + # Return a rate limit response for first 2 attempts + return HTTP.Response(429, ["retry-after" => "1"]) + else + # Success on 3rd attempt + return HTTP.Response(200) + end + end + + @test result.status == 200 + @test call_count[] ==3 + @test length(sleep_calls) == 2 # Should have slept twice + @test all(s -> s >= 1.0, sleep_calls) # Should respect retry-after + end + + @testset "Exception handling" begin + call_count = Ref(0) + sleep_calls = Float64[] + + function test_sleep(seconds) + push!(sleep_calls, seconds) + end + + # Test with recoverable exception (for GET method) + result = GitHub.with_retries(method="GET", max_retries=2, verbose=false, sleep_fn=test_sleep) do + call_count[] += 1 + if call_count[] < 3 + throw(Base.IOError("connection refused", 111)) + else + return HTTP.Response(200) + end + end + + @test result.status == 200 + @test call_count[] ==3 + @test length(sleep_calls) == 2 + end + + @testset "Non-retryable exceptions" begin + # Test that ArgumentError is not retried + @test_throws ArgumentError GitHub.with_retries(method="GET", verbose=false) do + throw(ArgumentError("invalid argument")) + end + end + + @testset "Max retries exhausted" begin + call_count = Ref(0) + sleep_calls = Float64[] + + function test_sleep(seconds) + push!(sleep_calls, seconds) + end + + # Test exhausting retries with rate limit responses + result = GitHub.with_retries(method="GET", max_retries=2, verbose=false, sleep_fn=test_sleep) do + call_count[] += 1 + return HTTP.Response(429, ["retry-after" => "0.1"]) # Always return rate limit + end + + @test result.status == 429 # Should return the final failed response + @test call_count[] ==3 # Initial + 2 retries + @test length(sleep_calls) == 2 + end + + @testset "Max retries exhausted with exception" begin + call_count = Ref(0) + + # Test exhausting retries with exceptions + @test_throws Base.IOError GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=x->nothing) do + call_count[] += 1 + throw(Base.IOError("persistent error", 104)) + end + + @test call_count[] ==2 # Initial + 1 retry + end + + @testset "Non-idempotent methods" begin + call_count = Ref(0) + + # POST requests should not retry on exceptions (non-idempotent) + @test_throws Base.IOError GitHub.with_retries(method="POST", verbose=false) do + call_count[] += 1 + throw(Base.IOError("connection refused", 111)) + end + + @test call_count[] ==1 # Should not retry + end + + @testset "GitHub rate limit handling" begin + call_count = Ref(0) + sleep_calls = Float64[] + + function test_sleep(seconds) + push!(sleep_calls, seconds) + end + + # Test primary rate limit with reset time + current_time = time() + reset_time = string(Int(round(current_time)) + 500000000) # 500000000 seconds from now + + result = GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=test_sleep) do + call_count[] += 1 + if call_count[] == 1 + return HTTP.Response(403, [ + "x-ratelimit-remaining" => "0", + "x-ratelimit-reset" => reset_time + ]) + else + return HTTP.Response(200) + end + end + + @test result.status == 200 + @test call_count[] ==2 + @test length(sleep_calls) == 1 + @test sleep_calls[1] >= 5.0 # Should wait at least until reset time + end + + @testset "Secondary rate limit with retry-after" begin + call_count = Ref(0) + sleep_calls = Float64[] + + function test_sleep(seconds) + push!(sleep_calls, seconds) + end + + body = """{"message": "You have exceeded a secondary rate limit."}""" + + result = GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=test_sleep) do + call_count[] += 1 + if call_count[] == 1 + return HTTP.Response(429, ["retry-after" => "3"]; body = Vector{UInt8}(body)) + else + return HTTP.Response(200) + end + end + + @test result.status == 200 + @test call_count[] == 2 + @test length(sleep_calls) == 1 + @test sleep_calls[1] == 3.0 # Should respect retry-after exactly + end + + @testset "Non-retryable HTTP errors" begin + call_count = Ref(0) + + # 404 should not be retried + result = GitHub.with_retries(method="GET", verbose=false) do + call_count[] += 1 + return HTTP.Response(404) + end + + @test result.status == 404 + @test call_count[] ==1 # Should not retry + end + + @testset "Zero max_retries" begin + call_count = Ref(0) + + # With max_retries=0, should only try once + result = GitHub.with_retries(method="GET", max_retries=0, verbose=false) do + call_count[] += 1 + return HTTP.Response(429) # Rate limit response + end + + @test result.status == 429 + @test call_count[] ==1 # Should not retry at all + end + + @testset "Sleep function not called on success" begin + sleep_called = Ref(false) + + function test_sleep(seconds) + sleep_called[] = true + end + + result = GitHub.with_retries(method="GET", verbose=false, sleep_fn=test_sleep) do + return HTTP.Response(200) + end + + @test result.status == 200 + @test !sleep_called[] + end + +end diff --git a/test/runtests.jl b/test/runtests.jl index 9e45211..f1c1814 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -7,6 +7,7 @@ include("ghtype_tests.jl") include("event_tests.jl") include("read_only_api_tests.jl") include("auth_tests.jl") +include("retries.jl") @testset "SSH keygen" begin pubkey, privkey = GitHub.genkeys(keycomment="GitHub.jl") From 195dd74f38efe614a983b06f60a9d106afab3cb0 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Sun, 28 Sep 2025 00:47:11 +0200 Subject: [PATCH 02/15] add note --- src/utils/requests.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/utils/requests.jl b/src/utils/requests.jl index 6c3923a..8aeb83c 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -94,6 +94,8 @@ A tuple `(should_retry, sleep_seconds)` where: - If `retry-after` present: use that delay - If `x-ratelimit-remaining: 0`: wait until reset time - Otherwise: wait at least 1 minute, then use exponential backoff + +This follows the [documentation from GitHub](https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api?apiVersion=2022-11-28#exceeding-the-rate-limit) as of 2025. """ function github_retry_decision(method::String, resp::Union{HTTP.Response, Nothing}, ex::Union{Exception, Nothing}, exponential_delay::Float64; verbose::Bool=true) # If we have a response, process it first (takes precedence over exceptions) From c83ee210fa60667e19bd0cd97804aecb0b8a0d1b Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Sun, 28 Sep 2025 00:52:35 +0200 Subject: [PATCH 03/15] rm useless comments --- src/utils/requests.jl | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/utils/requests.jl b/src/utils/requests.jl index 8aeb83c..00480da 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -185,7 +185,6 @@ function github_retry_decision(method::String, resp::Union{HTTP.Response, Nothin return (true, exponential_delay) end - # For client errors (400, 401, 404, 422), don't retry return (false, 0.0) end @@ -212,10 +211,8 @@ end ``` """ function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep) - # Create ExponentialBackOff iterator backoff = Base.ExponentialBackOff(n = max_retries+1) - # Try the function, with retries on failure for (attempt, exponential_delay) in enumerate(backoff) last_try = attempt > max_retries local r, ex @@ -237,7 +234,6 @@ function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbo should_retry, sleep_seconds = github_retry_decision(method, r, ex, exponential_delay; verbose=verbose) if !should_retry - # Don't retry - return response or throw exception if ex !== nothing throw(ex) else @@ -245,7 +241,6 @@ function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbo end end - # Sleep before the next attempt (at the end of the loop) if sleep_seconds > 0 sleep_fn(sleep_seconds) end @@ -262,7 +257,6 @@ function github_request(api::GitHubAPI, request_method::String, endpoint; _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") - # Use with_retries helper to handle the retry logic r = with_retries(method = request_method, max_retries = max_retries) do if request_method == "GET" return HTTP.request(request_method, URIs.URI(api_endpoint, query = params), _headers; @@ -322,9 +316,8 @@ function github_paged_get(api, endpoint; page_limit = Inf, start_page = "", hand _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") - # Helper function to make a single request with retries + # Helper function to make a get request with retries function make_request_with_retries(url, headers) - # Use with_retries helper to handle the retry logic return with_retries(method = "GET", max_retries = max_retries) do HTTP.request("GET", url, headers; status_exception = false, retry = false) end From 3edd3e8b181ae60d51c20dddfc5e860bcfd76021 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Sun, 28 Sep 2025 01:06:49 +0200 Subject: [PATCH 04/15] wip --- src/utils/requests.jl | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/utils/requests.jl b/src/utils/requests.jl index 00480da..a487380 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -14,6 +14,9 @@ end const DEFAULT_API = GitHubWebAPI(URIs.URI("https://api.github.com")) +const WRITE_METHOD_LOCK = ReentrantLock() +const LAST_WRITE_METHOD_TIMESTAMP = Ref{Float64}(0.0) + using Base.Meta """ @@ -212,11 +215,28 @@ end """ function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep) backoff = Base.ExponentialBackOff(n = max_retries+1) + method_upper = uppercase(method) + requires_write_throttle = method_upper in ("POST", "PATCH", "PUT", "DELETE") for (attempt, exponential_delay) in enumerate(backoff) last_try = attempt > max_retries local r, ex try + if requires_write_throttle + while true + lock(WRITE_METHOD_LOCK) + last_ts = LAST_WRITE_METHOD_TIMESTAMP[] + now = time() + wait_time = last_ts == 0.0 ? 0.0 : (last_ts + 1.0 - now) + if wait_time <= 0 + LAST_WRITE_METHOD_TIMESTAMP[] = now + unlock(WRITE_METHOD_LOCK) + break + end + unlock(WRITE_METHOD_LOCK) + sleep_fn(wait_time) + end + end r = f() ex = nothing if last_try From 5d59b004c602be451f6a95994403db29af6186fc Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Sun, 28 Sep 2025 01:18:44 +0200 Subject: [PATCH 05/15] fix logic so we don't misidentify access-forbidden 403s as rate-limits --- src/utils/requests.jl | 108 +++++++++++++++++++----------------------- test/retries.jl | 2 +- 2 files changed, 51 insertions(+), 59 deletions(-) diff --git a/src/utils/requests.jl b/src/utils/requests.jl index 00480da..46032d9 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -120,72 +120,64 @@ function github_retry_decision(method::String, resp::Union{HTTP.Response, Nothin end # At this point we have a response with status >= 400 + # First let us see if we want to retry it. - # Handle GitHub rate limiting (403, 429) with special logic - if status in (403, 429) - # Get all rate limit headers - limit = HTTP.header(resp, "x-ratelimit-limit", "") - remaining = HTTP.header(resp, "x-ratelimit-remaining", "") - used = HTTP.header(resp, "x-ratelimit-used", "") - reset_time = HTTP.header(resp, "x-ratelimit-reset", "") - resource = HTTP.header(resp, "x-ratelimit-resource", "") - retry_after = HTTP.header(resp, "retry-after", "") - - # Check response body for secondary rate limit indicators - # Note: `String` takes ownership / removes the body, so we make a copy - body = String(copy(resp.body)) - is_secondary_rate_limit = occursin("secondary rate limit", lowercase(body)) || !isempty(retry_after) - if is_secondary_rate_limit - # Secondary rate limit handling - # If retry-after header is present, respect it - delay_seconds = safe_tryparse(Float64, retry_after) - if delay_seconds !== nothing - delay_seconds = parse(Float64, retry_after) - verbose && @info "GitHub API secondary rate limit hit, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after - return (true, delay_seconds) - end + # Note: `String` takes ownership / removes the body, so we make a copy + body = String(copy(resp.body)) + is_primary_rate_limit = occursin("primary rate limit", lowercase(body)) && status in (403, 429) + is_secondary_rate_limit = occursin("secondary rate limit", lowercase(body)) && status in (403, 429) - # If x-ratelimit-remaining is 0, wait until reset time - reset_timestamp = safe_tryparse(Float64, reset_time) - if remaining == "0" && reset_timestamp !== nothing - current_time = time() - if reset_timestamp > current_time - delay_seconds = reset_timestamp - current_time + 1.0 - verbose && @info "GitHub API secondary rate limit hit, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after - return (true, delay_seconds) - end - end + do_retry = HTTP.RetryRequest.isidempotent(method) && (is_primary_rate_limit || is_secondary_rate_limit || HTTP.RetryRequest.retryable(status)) - # Otherwise, wait at least 1 minute, then use exponential backoff - delay_seconds = max(60.0, exponential_delay) - verbose && @info "GitHub API secondary rate limit hit, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after - return (true, delay_seconds) - else - # Primary rate limit handling - # If x-ratelimit-remaining is 0, wait until reset time - reset_timestamp = safe_tryparse(Float64, reset_time) - if remaining == "0" && reset_timestamp !== nothing - current_time = time() - if reset_timestamp > current_time - delay_seconds = reset_timestamp - current_time + 1.0 - verbose && @info "GitHub API primary rate limit hit, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource - return (true, delay_seconds) - end - end + if !do_retry + return (false, 0.0) + end - # For other primary rate limit cases, use exponential backoff - verbose && @info "GitHub API primary rate limit hit, retrying in $(round(exponential_delay, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource - return (true, exponential_delay) - end + # Now we know we want to retry. We need to decide how long to wait. + + # Get all rate limit headers + limit = HTTP.header(resp, "x-ratelimit-limit", "") + remaining = HTTP.header(resp, "x-ratelimit-remaining", "") + used = HTTP.header(resp, "x-ratelimit-used", "") + reset_time = HTTP.header(resp, "x-ratelimit-reset", "") + resource = HTTP.header(resp, "x-ratelimit-resource", "") + retry_after = HTTP.header(resp, "retry-after", "") + + msg = if is_primary_rate_limit + "GitHub API primary rate limit reached" + elseif is_secondary_rate_limit + "GitHub API secondary rate limit reached" + else + "GitHub API returned $status" + end + + # If retry-after header is present, respect it + delay_seconds = safe_tryparse(Float64, retry_after) + if delay_seconds !== nothing + delay_seconds = parse(Float64, retry_after) + verbose && @info "$msg, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after + return (true, delay_seconds) end - # For other HTTP errors, check if they're retryable according to HTTP.jl - if HTTP.RetryRequest.retryable(status) && HTTP.RetryRequest.isidempotent(method) - verbose && @info "GitHub API HTTP error, retrying in $(round(exponential_delay, digits=1))s" method=method status=status - return (true, exponential_delay) + # If x-ratelimit-remaining is 0, wait until reset time + reset_timestamp = safe_tryparse(Float64, reset_time) + if remaining == "0" && reset_timestamp !== nothing + current_time = time() + if reset_timestamp > current_time + delay_seconds = reset_timestamp - current_time + 1.0 + verbose && @info "$msg, retrying in $(round(delay_seconds, digits=1))s" method=method status=status limit=limit remaining=remaining used=used reset=reset_time resource=resource retry_after=retry_after + return (true, delay_seconds) + end end - return (false, 0.0) + # If secondary rate limit hit without headers to guide us, + # make sure we wait at least a minute + delay_seconds = is_secondary_rate_limit ? max(60.0, exponential_delay) : exponential_delay + + # Fall back to exponential backoff + verbose && @info "$msg, retrying in $(round(delay_seconds, digits=1))s" method=method status=status + + return (true, delay_seconds) end """ diff --git a/test/retries.jl b/test/retries.jl index 5afd512..bb7485f 100644 --- a/test/retries.jl +++ b/test/retries.jl @@ -146,7 +146,7 @@ using GitHub @testset "Invalid header values" begin # Test with invalid retry-after header (should fall back to secondary rate limit minimum) - resp1 = HTTP.Response(429, ["retry-after" => "invalid"]) + resp1 = HTTP.Response(429, ["retry-after" => "invalid"], Vector{UInt8}("secondary rate limit")) should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp1, nothing, 2.0; verbose=false) @test should_retry == true @test sleep_seconds == 60.0 # Falls back to secondary rate limit minimum (1 minute) From ed8d470a895102c0bfe1461ba14ef09a3ad8e6f3 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Sun, 28 Sep 2025 01:30:29 +0200 Subject: [PATCH 06/15] pass through `verbose` and add docs --- README.md | 5 +++++ src/utils/requests.jl | 10 +++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index a75cb1e..2db7476 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,11 @@ You can inspect which fields are available for a type `G<:GitHubType` by calling GitHub.jl implements a bunch of methods that make REST requests to GitHub's API. The below sections list these methods (note that a return type of `Tuple{Vector{T}, Dict}` means the result is [paginated](#pagination)). +These methods all accept keyword arguments which control how the request is made to github: + +- `max_retries::Int=5`: how many retries to attempt in requesting the resources. Retries are only made for idempotent requests ("GET", "HEAD", "OPTIONS", "TRACE", "PUT", "DELETE") and delays respect GitHub [rate limit headers](https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api?apiVersion=2022-11-28#checking-the-status-of-your-rate-limit). +- `verbose::Bool=true`: whether or not to log retries as Info level logs + #### Users and Organizations | method | return type | documentation | diff --git a/src/utils/requests.jl b/src/utils/requests.jl index 46032d9..7abd022 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -242,14 +242,14 @@ end function github_request(api::GitHubAPI, request_method::String, endpoint; auth = AnonymousAuth(), handle_error = true, headers = Dict(), params = Dict(), allowredirects = true, - max_retries = 5) + max_retries = 5, verbose = true) authenticate_headers!(headers, auth) params = github2json(params) api_endpoint = api_uri(api, endpoint) _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") - r = with_retries(method = request_method, max_retries = max_retries) do + r = with_retries(; method = request_method, max_retries, verbose) do if request_method == "GET" return HTTP.request(request_method, URIs.URI(api_endpoint, query = params), _headers; redirect = allowredirects, status_exception = false, @@ -303,20 +303,20 @@ end extract_page_url(link) = match(r"<.*?>", link).match[2:end-1] function github_paged_get(api, endpoint; page_limit = Inf, start_page = "", handle_error = true, - auth = AnonymousAuth(), headers = Dict(), params = Dict(), max_retries = 5, options...) + auth = AnonymousAuth(), headers = Dict(), params = Dict(), max_retries = 5, verbose = true, options...) authenticate_headers!(headers, auth) _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") # Helper function to make a get request with retries function make_request_with_retries(url, headers) - return with_retries(method = "GET", max_retries = max_retries) do + return with_retries(; method = "GET", max_retries, verbose) do HTTP.request("GET", url, headers; status_exception = false, retry = false) end end if isempty(start_page) - r = gh_get(api, endpoint; handle_error = handle_error, headers = _headers, params = params, auth=auth, max_retries=max_retries, options...) + r = gh_get(api, endpoint; handle_error, headers = _headers, params, auth, max_retries, verbose, options...) else @assert isempty(params) "`start_page` kwarg is incompatible with `params` kwarg" r = make_request_with_retries(start_page, _headers) From 76bc5064924b900c0ead752ebe14b2f553f7b6d9 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Sun, 28 Sep 2025 01:36:15 +0200 Subject: [PATCH 07/15] don't retry access errors --- src/utils/requests.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/utils/requests.jl b/src/utils/requests.jl index 7abd022..5c73539 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -127,7 +127,11 @@ function github_retry_decision(method::String, resp::Union{HTTP.Response, Nothin is_primary_rate_limit = occursin("primary rate limit", lowercase(body)) && status in (403, 429) is_secondary_rate_limit = occursin("secondary rate limit", lowercase(body)) && status in (403, 429) - do_retry = HTTP.RetryRequest.isidempotent(method) && (is_primary_rate_limit || is_secondary_rate_limit || HTTP.RetryRequest.retryable(status)) + # `other_retry` is `HTTP.RetryRequest.retryable(status)` minus 403, + # since if it's not a rate-limit, we don't want to retry 403s. + other_retry = status in (408, 409, 429, 500, 502, 503, 504, 599) + + do_retry = HTTP.RetryRequest.isidempotent(method) && (is_primary_rate_limit || is_secondary_rate_limit || other_retry) if !do_retry return (false, 0.0) From 1c8421e2b6dbbc8c66eea35899ba253384c06e47 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Sun, 28 Sep 2025 01:48:40 +0200 Subject: [PATCH 08/15] wip --- README.md | 1 + src/utils/requests.jl | 52 +++++++++++++++++++++++-------------------- 2 files changed, 29 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 2db7476..0c7ca76 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,7 @@ These methods all accept keyword arguments which control how the request is made - `max_retries::Int=5`: how many retries to attempt in requesting the resources. Retries are only made for idempotent requests ("GET", "HEAD", "OPTIONS", "TRACE", "PUT", "DELETE") and delays respect GitHub [rate limit headers](https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api?apiVersion=2022-11-28#checking-the-status-of-your-rate-limit). - `verbose::Bool=true`: whether or not to log retries as Info level logs +- `respect_mutation_delay::Bool=true`: whether or not to ensure at least 1s has passed since the last mutation has been submitted to the GitHub API from GitHub.jl, following GitHub's [recommended practice](https://docs.github.com/en/rest/using-the-rest-api/best-practices-for-using-the-rest-api?apiVersion=2022-11-28#pause-between-mutative-requests). #### Users and Organizations diff --git a/src/utils/requests.jl b/src/utils/requests.jl index e926057..2a4ac41 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -14,8 +14,8 @@ end const DEFAULT_API = GitHubWebAPI(URIs.URI("https://api.github.com")) -const WRITE_METHOD_LOCK = ReentrantLock() -const LAST_WRITE_METHOD_TIMESTAMP = Ref{Float64}(0.0) +const MUTATION_LOCK = ReentrantLock() +const LAST_MUTATION_TIMESTAMP = Ref{Float64}(0.0) using Base.Meta @@ -187,6 +187,22 @@ function github_retry_decision(method::String, resp::Union{HTTP.Response, Nothin return (true, delay_seconds) end +function wait_for_mutation_delay(; sleep_fn=sleep) + while true + now = time() + # Checking & setting must be atomic to prevent races, so we use a lock + @lock MUTATION_LOCK begin + last_ts = LAST_MUTATION_TIMESTAMP[] + wait_time = last_ts == 0.0 ? 0.0 : (last_ts + 1.0 - now) + if wait_time <= 0 # good to go + LAST_MUTATION_TIMESTAMP[] = now + return nothing + end + end + sleep_fn(wait_time) + end +end + """ with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep) -> Any @@ -209,30 +225,18 @@ result = with_retries(method="GET", verbose=false) do end ``` """ -function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep) +function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep, respect_mutation_delay=true) backoff = Base.ExponentialBackOff(n = max_retries+1) method_upper = uppercase(method) - requires_write_throttle = method_upper in ("POST", "PATCH", "PUT", "DELETE") + requires_mutation_throttle = respect_mutation_delay && method_upper in ("POST", "PATCH", "PUT", "DELETE") for (attempt, exponential_delay) in enumerate(backoff) last_try = attempt > max_retries + if requires_mutation_throttle + wait_for_mutation_delay(; sleep_fn) + end local r, ex try - if requires_write_throttle - while true - lock(WRITE_METHOD_LOCK) - last_ts = LAST_WRITE_METHOD_TIMESTAMP[] - now = time() - wait_time = last_ts == 0.0 ? 0.0 : (last_ts + 1.0 - now) - if wait_time <= 0 - LAST_WRITE_METHOD_TIMESTAMP[] = now - unlock(WRITE_METHOD_LOCK) - break - end - unlock(WRITE_METHOD_LOCK) - sleep_fn(wait_time) - end - end r = f() ex = nothing if last_try @@ -266,14 +270,14 @@ end function github_request(api::GitHubAPI, request_method::String, endpoint; auth = AnonymousAuth(), handle_error = true, headers = Dict(), params = Dict(), allowredirects = true, - max_retries = 5, verbose = true) + max_retries = 5, verbose = true, respect_mutation_delay = true) authenticate_headers!(headers, auth) params = github2json(params) api_endpoint = api_uri(api, endpoint) _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") - r = with_retries(; method = request_method, max_retries, verbose) do + r = with_retries(; method = request_method, max_retries, verbose, respect_mutation_delay) do if request_method == "GET" return HTTP.request(request_method, URIs.URI(api_endpoint, query = params), _headers; redirect = allowredirects, status_exception = false, @@ -327,20 +331,20 @@ end extract_page_url(link) = match(r"<.*?>", link).match[2:end-1] function github_paged_get(api, endpoint; page_limit = Inf, start_page = "", handle_error = true, - auth = AnonymousAuth(), headers = Dict(), params = Dict(), max_retries = 5, verbose = true, options...) + auth = AnonymousAuth(), headers = Dict(), params = Dict(), max_retries = 5, verbose = true, respect_mutation_delay = true, options...) authenticate_headers!(headers, auth) _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") # Helper function to make a get request with retries function make_request_with_retries(url, headers) - return with_retries(; method = "GET", max_retries, verbose) do + return with_retries(; method = "GET", max_retries, verbose, respect_mutation_delay) do HTTP.request("GET", url, headers; status_exception = false, retry = false) end end if isempty(start_page) - r = gh_get(api, endpoint; handle_error, headers = _headers, params, auth, max_retries, verbose, options...) + r = gh_get(api, endpoint; handle_error, headers = _headers, params, auth, max_retries, verbose, respect_mutation_delay, options...) else @assert isempty(params) "`start_page` kwarg is incompatible with `params` kwarg" r = make_request_with_retries(start_page, _headers) From 416b21e370b366a8a33d6d8cd751ff730a9ef02a Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Sun, 28 Sep 2025 18:50:57 +0200 Subject: [PATCH 09/15] add max_sleep_seconds, fix tests --- README.md | 1 + src/GitHub.jl | 2 +- src/utils/requests.jl | 26 +++++++++++------- test/retries.jl | 61 ++++++++++++++++++++++--------------------- 4 files changed, 50 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 2db7476..d26de5b 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,7 @@ These methods all accept keyword arguments which control how the request is made - `max_retries::Int=5`: how many retries to attempt in requesting the resources. Retries are only made for idempotent requests ("GET", "HEAD", "OPTIONS", "TRACE", "PUT", "DELETE") and delays respect GitHub [rate limit headers](https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api?apiVersion=2022-11-28#checking-the-status-of-your-rate-limit). - `verbose::Bool=true`: whether or not to log retries as Info level logs +- `max_sleep_seconds::Real=60*20`: if GitHub.jl intends to sleep for longer than `max_sleep_seconds` before retrying, e.g. due to rate limit headers from GitHub, throws an `RetryDelayException` instead. #### Users and Organizations diff --git a/src/GitHub.jl b/src/GitHub.jl index dd6e24c..d4bfcb3 100644 --- a/src/GitHub.jl +++ b/src/GitHub.jl @@ -44,7 +44,7 @@ export # auth.jl authenticate export # requests.jl - rate_limit + rate_limit, RetryDelayException ################################## # Owners (organizations + users) # diff --git a/src/utils/requests.jl b/src/utils/requests.jl index 5c73539..22ef536 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -184,8 +184,13 @@ function github_retry_decision(method::String, resp::Union{HTTP.Response, Nothin return (true, delay_seconds) end +struct RetryDelayException <: Exception + msg::String +end +Base.showerror(io::IO, e::RetryDelayException) = print(io, e.msg) + """ - with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep) -> Any + with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep, max_sleep_seconds::Real = 20*60) Generic retry wrapper that executes function `f()` with GitHub-specific retry logic. @@ -195,6 +200,7 @@ Generic retry wrapper that executes function `f()` with GitHub-specific retry lo - `max_retries`: Maximum number of retry attempts (default: 5) - `verbose`: Whether to log retry decisions (default: true) - `sleep_fn`: Function to call for sleeping between retries (default: sleep). For testing, can be replaced with a custom function. +- `max_sleep_seconds::Real`: maximum number of seconds to sleep when delaying before retrying. If the intended retry delay exceeds `max_sleep_seconds` an exception is thrown instead. This parameter defaults to 20*60 (20 minutes). # Returns Returns the result of `f()` if successful, or re-throws the final exception if all retries fail. @@ -206,7 +212,7 @@ result = with_retries(method="GET", verbose=false) do end ``` """ -function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep) +function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep, max_sleep_seconds::Real = 60*20) backoff = Base.ExponentialBackOff(n = max_retries+1) for (attempt, exponential_delay) in enumerate(backoff) @@ -227,7 +233,7 @@ function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbo end # Check if we should retry based on this attempt - should_retry, sleep_seconds = github_retry_decision(method, r, ex, exponential_delay; verbose=verbose) + should_retry, sleep_seconds = github_retry_decision(method, r, ex, exponential_delay; verbose) if !should_retry if ex !== nothing @@ -236,7 +242,9 @@ function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbo return r end end - + if sleep_seconds > max_sleep_seconds + throw(RetryDelayException("Retry delay $(sleep_seconds) exceeds configured maximum ($(max_sleep_seconds) seconds)")) + end if sleep_seconds > 0 sleep_fn(sleep_seconds) end @@ -246,14 +254,14 @@ end function github_request(api::GitHubAPI, request_method::String, endpoint; auth = AnonymousAuth(), handle_error = true, headers = Dict(), params = Dict(), allowredirects = true, - max_retries = 5, verbose = true) + max_retries = 5, verbose = true, max_sleep_seconds = 20*60) authenticate_headers!(headers, auth) params = github2json(params) api_endpoint = api_uri(api, endpoint) _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") - r = with_retries(; method = request_method, max_retries, verbose) do + r = with_retries(; method = request_method, max_retries, verbose, max_sleep_seconds) do if request_method == "GET" return HTTP.request(request_method, URIs.URI(api_endpoint, query = params), _headers; redirect = allowredirects, status_exception = false, @@ -307,20 +315,20 @@ end extract_page_url(link) = match(r"<.*?>", link).match[2:end-1] function github_paged_get(api, endpoint; page_limit = Inf, start_page = "", handle_error = true, - auth = AnonymousAuth(), headers = Dict(), params = Dict(), max_retries = 5, verbose = true, options...) + auth = AnonymousAuth(), headers = Dict(), params = Dict(), max_retries = 5, verbose = true, max_sleep_seconds = 20*60, options...) authenticate_headers!(headers, auth) _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") # Helper function to make a get request with retries function make_request_with_retries(url, headers) - return with_retries(; method = "GET", max_retries, verbose) do + return with_retries(; method = "GET", max_retries, verbose, max_sleep_seconds) do HTTP.request("GET", url, headers; status_exception = false, retry = false) end end if isempty(start_page) - r = gh_get(api, endpoint; handle_error, headers = _headers, params, auth, max_retries, verbose, options...) + r = gh_get(api, endpoint; handle_error, headers = _headers, params, auth, max_retries, verbose, max_sleep_seconds, options...) else @assert isempty(params) "`start_page` kwarg is incompatible with `params` kwarg" r = make_request_with_retries(start_page, _headers) diff --git a/test/retries.jl b/test/retries.jl index bb7485f..e6a3a43 100644 --- a/test/retries.jl +++ b/test/retries.jl @@ -2,6 +2,9 @@ using Test using HTTP using GitHub +primary_rate_limit_body = Vector{UInt8}("primary rate limit") +secondary_rate_limit_body = Vector{UInt8}("secondary rate limit") + @testset "github_retry_decision" begin @testset "HTTP.jl recoverable exceptions" begin @@ -42,9 +45,9 @@ using GitHub resp = HTTP.Response(403, [ "x-ratelimit-remaining" => "0", "x-ratelimit-reset" => future_time - ]) + ], primary_rate_limit_body) - should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 2.0; verbose=false) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET", resp, nothing, 2.0; verbose=false) @test should_retry == true @test sleep_seconds > 100000 # Should be a large delay since reset time is far in future @@ -53,9 +56,9 @@ using GitHub resp2 = HTTP.Response(403, [ "x-ratelimit-remaining" => "0", "x-ratelimit-reset" => past_time - ]) + ], primary_rate_limit_body) - should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp2, nothing, 5.0; verbose=false) + should_retry, sleep_seconds = GitHub.github_retry_decision("GET", resp2, nothing, 5.0; verbose=false) @test should_retry == true @test sleep_seconds == 5.0 # Should use the exponential delay end @@ -63,8 +66,7 @@ using GitHub @testset "Secondary rate limit - retry-after header" begin # Test secondary rate limit with retry-after - body = """{"message": "You have been rate limited due to a secondary rate limit", "documentation_url": "..."}""" - resp = HTTP.Response(429, ["retry-after" => "30"]; body = Vector{UInt8}(body)) + resp = HTTP.Response(429, ["retry-after" => "30"]; body = secondary_rate_limit_body) should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 2.0; verbose=false) @test should_retry == true @@ -77,11 +79,8 @@ using GitHub @test sleep_seconds == 15.0 end - @testset "Secondary rate limit - message in body" begin - - # Test secondary rate limit detected from body message - body = """{"message": "You have exceeded a secondary rate limit. Please wait one minute before trying again."}""" - resp = HTTP.Response(429; body = Vector{UInt8}(body)) + @testset "Secondary rate limit - no headers" begin + resp = HTTP.Response(429; body = secondary_rate_limit_body) should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 2.0; verbose=false) @test should_retry == true @@ -97,20 +96,18 @@ using GitHub # Test secondary rate limit with reset time - use fixed timestamp to avoid race conditions future_time = "1900000000" # Fixed timestamp in the future (year 2030) - body = """{"message": "secondary rate limit exceeded"}""" resp = HTTP.Response(403, [ "x-ratelimit-remaining" => "0", "x-ratelimit-reset" => future_time - ]; body = Vector{UInt8}(body)) + ], secondary_rate_limit_body) should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 5.0; verbose=false) @test should_retry == true @test sleep_seconds > 100000 # Should be a large delay since reset time is far in future end - @testset "Primary rate limit - exponential backoff" begin - - # Primary rate limit without specific headers + @testset "429 - exponential backoff" begin + # 429 without specific headers or body resp = HTTP.Response(429, []) should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 4.0; verbose=false) @@ -123,7 +120,6 @@ using GitHub end @testset "Other HTTP errors" begin - for status in [408, 409, 500, 502, 503, 504, 599] resp = HTTP.Response(status, []) @@ -134,8 +130,7 @@ using GitHub end @testset "Non-retryable client errors" begin - - for status in [400, 401, 404, 422] + for status in [400, 401, 403, 404, 422] resp = HTTP.Response(status, []) should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 1.0; verbose=false) @test should_retry == false @@ -144,25 +139,23 @@ using GitHub end @testset "Invalid header values" begin - - # Test with invalid retry-after header (should fall back to secondary rate limit minimum) - resp1 = HTTP.Response(429, ["retry-after" => "invalid"], Vector{UInt8}("secondary rate limit")) + # Test with invalid retry-after header (should use secondary rate limit minimum) + resp1 = HTTP.Response(429, ["retry-after" => "invalid"], secondary_rate_limit_body) should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp1, nothing, 2.0; verbose=false) @test should_retry == true @test sleep_seconds == 60.0 # Falls back to secondary rate limit minimum (1 minute) - # Test with invalid reset time (should fall back to exponential backoff) + # Test with invalid reset time (should fall back to secondary min) resp2 = HTTP.Response(403, [ "x-ratelimit-remaining" => "0", "x-ratelimit-reset" => "invalid" - ]) + ], secondary_rate_limit_body) should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp2, nothing, 3.0; verbose=false) @test should_retry == true - @test sleep_seconds == 3.0 # Falls back to exponential backoff + @test sleep_seconds == 60.0 # minimum for secondary rate limit end @testset "Rate limit header precedence" begin - # retry-after should take precedence over x-ratelimit-reset future_time = "1900000000" # Fixed timestamp (doesn't matter since retry-after takes precedence) resp = HTTP.Response(429, [ @@ -183,7 +176,7 @@ using GitHub resp = HTTP.Response(403, [ "x-ratelimit-remaining" => "5", "x-ratelimit-reset" => future_time - ]) + ], primary_rate_limit_body) should_retry, sleep_seconds = GitHub.github_retry_decision("GET",resp, nothing, 3.0; verbose=false) @test should_retry == true @@ -332,13 +325,13 @@ end current_time = time() reset_time = string(Int(round(current_time)) + 500000000) # 500000000 seconds from now - result = GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=test_sleep) do + result = GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=test_sleep, max_sleep_seconds=2*500000000) do call_count[] += 1 if call_count[] == 1 return HTTP.Response(403, [ "x-ratelimit-remaining" => "0", "x-ratelimit-reset" => reset_time - ]) + ], primary_rate_limit_body) else return HTTP.Response(200) end @@ -347,7 +340,15 @@ end @test result.status == 200 @test call_count[] ==2 @test length(sleep_calls) == 1 - @test sleep_calls[1] >= 5.0 # Should wait at least until reset time + @test sleep_calls[1] >= 500000000 # Should wait at least until reset time + + + @test_throws RetryDelayException GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=test_sleep) do + return HTTP.Response(403, [ + "x-ratelimit-remaining" => "0", + "x-ratelimit-reset" => reset_time + ], primary_rate_limit_body) + end end @testset "Secondary rate limit with retry-after" begin From 7da83b952664bde0cae01304a08401a6c2c907a0 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Sun, 28 Sep 2025 19:49:48 +0200 Subject: [PATCH 10/15] fix variable scope, add tests --- src/utils/requests.jl | 5 ++-- test/retries.jl | 69 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/src/utils/requests.jl b/src/utils/requests.jl index ea8ea4b..af90101 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -187,9 +187,10 @@ function github_retry_decision(method::String, resp::Union{HTTP.Response, Nothin return (true, delay_seconds) end -function wait_for_mutation_delay(; sleep_fn=sleep) +function wait_for_mutation_delay(; sleep_fn=sleep, time_fn=time) while true - now = time() + now = time_fn() + local wait_time # Checking & setting must be atomic to prevent races, so we use a lock @lock MUTATION_LOCK begin last_ts = LAST_MUTATION_TIMESTAMP[] diff --git a/test/retries.jl b/test/retries.jl index e6a3a43..9bce7d8 100644 --- a/test/retries.jl +++ b/test/retries.jl @@ -418,3 +418,72 @@ end end end + +@testset "wait_for_mutation_delay" begin + # Mock time and sleep to avoid real delays + times = [1.0, 3.0, 4.0, 5.0] + time_calls = Ref(0) + sleep_calls = Float64[] + + mock_time() = (time_calls[] += 1; times[time_calls[]]) + mock_sleep(t) = push!(sleep_calls, t) + + # Reset state for clean testing + @lock GitHub.MUTATION_LOCK begin + GitHub.LAST_MUTATION_TIMESTAMP[] = 0.0 + end + + # First call should not wait (no previous mutation) + GitHub.wait_for_mutation_delay(; sleep_fn=mock_sleep, time_fn=mock_time) + @test length(sleep_calls) == 0 + @test time_calls[] == 1 + + # Second call should wait for remaining time (1.5 + 1.0 - 3.0 = -0.5, so no wait) + GitHub.wait_for_mutation_delay(; sleep_fn=mock_sleep, time_fn=mock_time) + @test length(sleep_calls) == 0 # No sleep needed since enough time passed + + # Reset for third test - force a wait scenario + @lock GitHub.MUTATION_LOCK begin + GitHub.LAST_MUTATION_TIMESTAMP[] = 3.5 # Recent timestamp + end + time_calls[] = 2 # Start at time 3.0 + empty!(sleep_calls) + + GitHub.wait_for_mutation_delay(; sleep_fn=mock_sleep, time_fn=mock_time) + @test length(sleep_calls) == 1 + @test sleep_calls[1] ≈ 0.5 # Should wait 3.5 + 1.0 - 3.0 = 0.5 seconds +end + +@testset "with_retries mutation delay integration" begin + sleep_calls = Float64[] + mock_sleep(t) = push!(sleep_calls, t) + + # Reset mutation state + @lock GitHub.MUTATION_LOCK begin + GitHub.LAST_MUTATION_TIMESTAMP[] = 0.0 + end + + # Test: GET method should not trigger mutation delay + empty!(sleep_calls) + result = GitHub.with_retries(method="GET", max_retries=0, verbose=false, sleep_fn=mock_sleep) do + HTTP.Response(200) + end + @test result.status == 200 + @test length(sleep_calls) == 0 # No mutation delay for GET + + # Test: POST method should trigger mutation delay (but first call won't wait) + empty!(sleep_calls) + result = GitHub.with_retries(method="POST", max_retries=0, verbose=false, sleep_fn=mock_sleep) do + HTTP.Response(200) + end + @test result.status == 200 + @test length(sleep_calls) == 0 # First POST doesn't wait + + # Test: respect_mutation_delay=false should bypass delay + empty!(sleep_calls) + result = GitHub.with_retries(method="POST", max_retries=0, verbose=false, sleep_fn=mock_sleep, respect_mutation_delay=false) do + HTTP.Response(200) + end + @test result.status == 200 + @test length(sleep_calls) == 0 # Delay bypassed +end From e77a7c68d297ee0856ff26ff40434db857e8a7fd Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Wed, 1 Oct 2025 01:47:00 +0200 Subject: [PATCH 11/15] switch to separate delays per auth --- Project.toml | 4 +++ src/GitHub.jl | 2 ++ src/utils/auth.jl | 23 ++++++++++++ src/utils/requests.jl | 20 +++++------ test/auth_tests.jl | 83 ++++++++++++++++++++++++++++++++++++++++++ test/retries.jl | 84 ++++++++++++++++++++++++++++++++----------- 6 files changed, 185 insertions(+), 31 deletions(-) diff --git a/Project.toml b/Project.toml index 549869d..2e373bb 100644 --- a/Project.toml +++ b/Project.toml @@ -8,6 +8,8 @@ Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" MbedTLS = "739be429-bea8-5141-9913-cc70e7f3736d" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce" Sockets = "6462fe0b-24de-5631-8697-dd941f90decc" SodiumSeal = "2133526b-2bfb-4018-ac12-889fb3908a75" URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" @@ -17,6 +19,8 @@ HTTP = "1.10.17" # Must be >= 1.10.17, see https://github.com/JuliaWeb/GitHub.jl URIs = "1.6" # Must be >= 1.6, see https://github.com/JuliaWeb/GitHub.jl/pull/225 JSON = "0.19, 0.20, 0.21" MbedTLS = "0.6, 0.7, 1" +Random = "1.11.0" +SHA = "0.7.0, 1" SodiumSeal = "0.1" julia = "1.6" diff --git a/src/GitHub.jl b/src/GitHub.jl index d4bfcb3..2dd600d 100644 --- a/src/GitHub.jl +++ b/src/GitHub.jl @@ -2,6 +2,8 @@ module GitHub using Dates using Base64 +using Random +using SHA: sha256 ########## # import # diff --git a/src/utils/auth.jl b/src/utils/auth.jl index 976b8fa..c657cc4 100644 --- a/src/utils/auth.jl +++ b/src/utils/auth.jl @@ -97,3 +97,26 @@ function Base.show(io::IO, a::OAuth2) token_str = a.token[1:6] * repeat("*", length(a.token) - 6) print(io, "GitHub.OAuth2($token_str)") end + +########### +# Hashing # +########### + +# These are used to implement the 1s mutation delay on a per-auth basis +# See also `wait_for_mutation_delay`. We want it to be difficult to +# invert the hash to recover the token, as the hash is stored in a global dict +# in this module. +let + salt = randstring(RandomDevice(), 20) + global uint64_hash(str::AbstractString) = first(reinterpret(UInt64, sha256(string(salt, str))[1:8])) +end + +get_auth_hash(auth::OAuth2) = uint64_hash(auth.token) + +get_auth_hash(auth::UsernamePassAuth) = uint64_hash(string(auth.username, "##", auth.password)) + +get_auth_hash(::AnonymousAuth) = UInt64(0) + +# note this will give different hashes for different JWTs from the same key, +# meaning in that case the 1s mutation delay will apply per-JWT instead of per-key +get_auth_hash(auth::JWTAuth) = uint64_hash(auth.JWT) diff --git a/src/utils/requests.jl b/src/utils/requests.jl index af90101..32dd01e 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -15,7 +15,7 @@ end const DEFAULT_API = GitHubWebAPI(URIs.URI("https://api.github.com")) const MUTATION_LOCK = ReentrantLock() -const LAST_MUTATION_TIMESTAMP = Ref{Float64}(0.0) +const LAST_MUTATION_TIMESTAMPS = Dict{UInt64, Float64}() using Base.Meta @@ -187,16 +187,16 @@ function github_retry_decision(method::String, resp::Union{HTTP.Response, Nothin return (true, delay_seconds) end -function wait_for_mutation_delay(; sleep_fn=sleep, time_fn=time) +function wait_for_mutation_delay(auth_hash; sleep_fn=sleep, time_fn=time) while true now = time_fn() local wait_time # Checking & setting must be atomic to prevent races, so we use a lock @lock MUTATION_LOCK begin - last_ts = LAST_MUTATION_TIMESTAMP[] + last_ts = get(LAST_MUTATION_TIMESTAMPS, auth_hash, 0.0) wait_time = last_ts == 0.0 ? 0.0 : (last_ts + 1.0 - now) if wait_time <= 0 # good to go - LAST_MUTATION_TIMESTAMP[] = now + LAST_MUTATION_TIMESTAMPS[auth_hash] = now return nothing end end @@ -232,15 +232,14 @@ result = with_retries(method="GET", verbose=false) do end ``` """ -function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep, max_sleep_seconds::Real = 60*20, respect_mutation_delay=true) +function with_retries(f; method::AbstractString="GET", max_retries::Int=5, verbose::Bool=true, sleep_fn=sleep, max_sleep_seconds::Real = 60*20, respect_mutation_delay=true, auth_hash) backoff = Base.ExponentialBackOff(n = max_retries+1) method_upper = uppercase(method) requires_mutation_throttle = respect_mutation_delay && method_upper in ("POST", "PATCH", "PUT", "DELETE") - for (attempt, exponential_delay) in enumerate(backoff) last_try = attempt > max_retries if requires_mutation_throttle - wait_for_mutation_delay(; sleep_fn) + wait_for_mutation_delay(auth_hash; sleep_fn) end local r, ex try @@ -285,8 +284,8 @@ function github_request(api::GitHubAPI, request_method::String, endpoint; api_endpoint = api_uri(api, endpoint) _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") - - r = with_retries(; method = request_method, max_retries, verbose, max_sleep_seconds, respect_mutation_delay) do + auth_hash = get_auth_hash(auth) + r = with_retries(; method = request_method, max_retries, verbose, max_sleep_seconds, respect_mutation_delay, auth_hash) do if request_method == "GET" return HTTP.request(request_method, URIs.URI(api_endpoint, query = params), _headers; redirect = allowredirects, status_exception = false, @@ -345,9 +344,10 @@ function github_paged_get(api, endpoint; page_limit = Inf, start_page = "", hand _headers = convert(Dict{String, String}, headers) !haskey(_headers, "User-Agent") && (_headers["User-Agent"] = "GitHub-jl") + auth_hash = get_auth_hash(auth) # Helper function to make a get request with retries function make_request_with_retries(url, headers) - return with_retries(; method = "GET", max_retries, verbose, max_sleep_seconds, respect_mutation_delay) do + return with_retries(; method = "GET", max_retries, verbose, max_sleep_seconds, respect_mutation_delay, auth_hash) do HTTP.request("GET", url, headers; status_exception = false, retry = false) end end diff --git a/test/auth_tests.jl b/test/auth_tests.jl index e6d07cc..107101c 100644 --- a/test/auth_tests.jl +++ b/test/auth_tests.jl @@ -24,3 +24,86 @@ auth2 = GitHub.JWTAuth(1234, key; iat = DateTime("2016-9-15T14:00")) @test_throws ArgumentError GitHub.OAuth2("ghp_\n") @test_throws ArgumentError GitHub.JWTAuth("ghp_\n") + +@testset "uint64_hash tests" begin + # Test that uint64_hash produces UInt64 values + @test typeof(GitHub.uint64_hash("test")) == UInt64 + + # Test that same input produces same output (deterministic) + @test GitHub.uint64_hash("hello") == GitHub.uint64_hash("hello") + + # Test that different inputs produce different outputs + @test GitHub.uint64_hash("hello") != GitHub.uint64_hash("world") + + # Test with empty string + @test typeof(GitHub.uint64_hash("")) == UInt64 + + # Test with special characters + @test typeof(GitHub.uint64_hash("!@#\$%^&*()")) == UInt64 + @test GitHub.uint64_hash("test!") != GitHub.uint64_hash("test") +end + +@testset "get_auth_hash tests" begin + @testset "OAuth2 hashing" begin + # Same token should produce same hash + auth1 = GitHub.OAuth2("ghp_testtoken123456") + auth2 = GitHub.OAuth2("ghp_testtoken123456") + @test GitHub.get_auth_hash(auth1) == GitHub.get_auth_hash(auth2) + @test typeof(GitHub.get_auth_hash(auth1)) == UInt64 + + # Different tokens should produce different hashes + auth3 = GitHub.OAuth2("ghp_differenttoken789") + @test GitHub.get_auth_hash(auth1) != GitHub.get_auth_hash(auth3) + end + + @testset "UsernamePassAuth hashing" begin + # Same credentials should produce same hash + auth1 = GitHub.UsernamePassAuth("user1", "pass1") + auth2 = GitHub.UsernamePassAuth("user1", "pass1") + @test GitHub.get_auth_hash(auth1) == GitHub.get_auth_hash(auth2) + @test typeof(GitHub.get_auth_hash(auth1)) == UInt64 + + # Different username should produce different hash + auth3 = GitHub.UsernamePassAuth("user2", "pass1") + @test GitHub.get_auth_hash(auth1) != GitHub.get_auth_hash(auth3) + + # Different password should produce different hash + auth4 = GitHub.UsernamePassAuth("user1", "pass2") + @test GitHub.get_auth_hash(auth1) != GitHub.get_auth_hash(auth4) + end + + @testset "AnonymousAuth hashing" begin + # AnonymousAuth should always return UInt64(0) + auth1 = GitHub.AnonymousAuth() + auth2 = GitHub.AnonymousAuth() + @test GitHub.get_auth_hash(auth1) == UInt64(0) + @test GitHub.get_auth_hash(auth2) == UInt64(0) + @test GitHub.get_auth_hash(auth1) == GitHub.get_auth_hash(auth2) + end + + @testset "JWTAuth hashing" begin + # Same JWT should produce same hash + auth1 = GitHub.JWTAuth(1234, joinpath(dirname(@__FILE__), "not_a_real_key.pem"); + iat = DateTime("2016-9-15T14:00")) + auth2 = GitHub.JWTAuth(1234, joinpath(dirname(@__FILE__), "not_a_real_key.pem"); + iat = DateTime("2016-9-15T14:00")) + @test GitHub.get_auth_hash(auth1) == GitHub.get_auth_hash(auth2) + @test typeof(GitHub.get_auth_hash(auth1)) == UInt64 + + # Different iat should produce different hash (different JWT) + auth3 = GitHub.JWTAuth(1234, joinpath(dirname(@__FILE__), "not_a_real_key.pem"); + iat = DateTime("2016-9-15T14:01")) + @test GitHub.get_auth_hash(auth1) != GitHub.get_auth_hash(auth3) + end + + @testset "Different auth types produce different hashes" begin + oauth = GitHub.OAuth2("ghp_samestring") + anon = GitHub.AnonymousAuth() + userpass = GitHub.UsernamePassAuth("user", "pass") + + # All should be different from each other + @test GitHub.get_auth_hash(oauth) != GitHub.get_auth_hash(anon) + @test GitHub.get_auth_hash(oauth) != GitHub.get_auth_hash(userpass) + @test GitHub.get_auth_hash(anon) != GitHub.get_auth_hash(userpass) + end +end diff --git a/test/retries.jl b/test/retries.jl index 9bce7d8..a0b869e 100644 --- a/test/retries.jl +++ b/test/retries.jl @@ -223,7 +223,7 @@ end push!(sleep_calls, seconds) end - result = GitHub.with_retries(method="GET", max_retries=2, verbose=false, sleep_fn=test_sleep) do + result = GitHub.with_retries(method="GET", max_retries=2, verbose=false, sleep_fn=test_sleep, auth_hash=UInt64(0)) do call_count[] += 1 if call_count[] < 3 # Return a rate limit response for first 2 attempts @@ -249,7 +249,7 @@ end end # Test with recoverable exception (for GET method) - result = GitHub.with_retries(method="GET", max_retries=2, verbose=false, sleep_fn=test_sleep) do + result = GitHub.with_retries(method="GET", max_retries=2, verbose=false, sleep_fn=test_sleep, auth_hash=UInt64(0)) do call_count[] += 1 if call_count[] < 3 throw(Base.IOError("connection refused", 111)) @@ -265,7 +265,7 @@ end @testset "Non-retryable exceptions" begin # Test that ArgumentError is not retried - @test_throws ArgumentError GitHub.with_retries(method="GET", verbose=false) do + @test_throws ArgumentError GitHub.with_retries(method="GET", verbose=false, auth_hash=UInt64(0)) do throw(ArgumentError("invalid argument")) end end @@ -279,7 +279,7 @@ end end # Test exhausting retries with rate limit responses - result = GitHub.with_retries(method="GET", max_retries=2, verbose=false, sleep_fn=test_sleep) do + result = GitHub.with_retries(method="GET", max_retries=2, verbose=false, sleep_fn=test_sleep, auth_hash=UInt64(0)) do call_count[] += 1 return HTTP.Response(429, ["retry-after" => "0.1"]) # Always return rate limit end @@ -293,7 +293,7 @@ end call_count = Ref(0) # Test exhausting retries with exceptions - @test_throws Base.IOError GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=x->nothing) do + @test_throws Base.IOError GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=x->nothing, auth_hash=UInt64(0)) do call_count[] += 1 throw(Base.IOError("persistent error", 104)) end @@ -305,7 +305,7 @@ end call_count = Ref(0) # POST requests should not retry on exceptions (non-idempotent) - @test_throws Base.IOError GitHub.with_retries(method="POST", verbose=false) do + @test_throws Base.IOError GitHub.with_retries(method="POST", verbose=false, auth_hash=UInt64(0)) do call_count[] += 1 throw(Base.IOError("connection refused", 111)) end @@ -325,7 +325,7 @@ end current_time = time() reset_time = string(Int(round(current_time)) + 500000000) # 500000000 seconds from now - result = GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=test_sleep, max_sleep_seconds=2*500000000) do + result = GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=test_sleep, max_sleep_seconds=2*500000000, auth_hash=UInt64(0)) do call_count[] += 1 if call_count[] == 1 return HTTP.Response(403, [ @@ -343,7 +343,7 @@ end @test sleep_calls[1] >= 500000000 # Should wait at least until reset time - @test_throws RetryDelayException GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=test_sleep) do + @test_throws RetryDelayException GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=test_sleep, auth_hash=UInt64(0)) do return HTTP.Response(403, [ "x-ratelimit-remaining" => "0", "x-ratelimit-reset" => reset_time @@ -361,7 +361,7 @@ end body = """{"message": "You have exceeded a secondary rate limit."}""" - result = GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=test_sleep) do + result = GitHub.with_retries(method="GET", max_retries=1, verbose=false, sleep_fn=test_sleep, auth_hash=UInt64(0)) do call_count[] += 1 if call_count[] == 1 return HTTP.Response(429, ["retry-after" => "3"]; body = Vector{UInt8}(body)) @@ -380,7 +380,7 @@ end call_count = Ref(0) # 404 should not be retried - result = GitHub.with_retries(method="GET", verbose=false) do + result = GitHub.with_retries(method="GET", verbose=false, auth_hash=UInt64(0)) do call_count[] += 1 return HTTP.Response(404) end @@ -393,7 +393,7 @@ end call_count = Ref(0) # With max_retries=0, should only try once - result = GitHub.with_retries(method="GET", max_retries=0, verbose=false) do + result = GitHub.with_retries(method="GET", max_retries=0, verbose=false, auth_hash=UInt64(0)) do call_count[] += 1 return HTTP.Response(429) # Rate limit response end @@ -409,7 +409,7 @@ end sleep_called[] = true end - result = GitHub.with_retries(method="GET", verbose=false, sleep_fn=test_sleep) do + result = GitHub.with_retries(method="GET", verbose=false, sleep_fn=test_sleep, auth_hash=UInt64(0)) do return HTTP.Response(200) end @@ -428,28 +428,31 @@ end mock_time() = (time_calls[] += 1; times[time_calls[]]) mock_sleep(t) = push!(sleep_calls, t) + # Use a test auth hash + test_auth_hash = UInt64(12345) + # Reset state for clean testing @lock GitHub.MUTATION_LOCK begin - GitHub.LAST_MUTATION_TIMESTAMP[] = 0.0 + empty!(GitHub.LAST_MUTATION_TIMESTAMPS) end # First call should not wait (no previous mutation) - GitHub.wait_for_mutation_delay(; sleep_fn=mock_sleep, time_fn=mock_time) + GitHub.wait_for_mutation_delay(test_auth_hash; sleep_fn=mock_sleep, time_fn=mock_time) @test length(sleep_calls) == 0 @test time_calls[] == 1 # Second call should wait for remaining time (1.5 + 1.0 - 3.0 = -0.5, so no wait) - GitHub.wait_for_mutation_delay(; sleep_fn=mock_sleep, time_fn=mock_time) + GitHub.wait_for_mutation_delay(test_auth_hash; sleep_fn=mock_sleep, time_fn=mock_time) @test length(sleep_calls) == 0 # No sleep needed since enough time passed # Reset for third test - force a wait scenario @lock GitHub.MUTATION_LOCK begin - GitHub.LAST_MUTATION_TIMESTAMP[] = 3.5 # Recent timestamp + GitHub.LAST_MUTATION_TIMESTAMPS[test_auth_hash] = 3.5 # Recent timestamp end time_calls[] = 2 # Start at time 3.0 empty!(sleep_calls) - GitHub.wait_for_mutation_delay(; sleep_fn=mock_sleep, time_fn=mock_time) + GitHub.wait_for_mutation_delay(test_auth_hash; sleep_fn=mock_sleep, time_fn=mock_time) @test length(sleep_calls) == 1 @test sleep_calls[1] ≈ 0.5 # Should wait 3.5 + 1.0 - 3.0 = 0.5 seconds end @@ -458,14 +461,17 @@ end sleep_calls = Float64[] mock_sleep(t) = push!(sleep_calls, t) + # Use test auth hashes + test_auth_hash = UInt64(12345) + # Reset mutation state @lock GitHub.MUTATION_LOCK begin - GitHub.LAST_MUTATION_TIMESTAMP[] = 0.0 + empty!(GitHub.LAST_MUTATION_TIMESTAMPS) end # Test: GET method should not trigger mutation delay empty!(sleep_calls) - result = GitHub.with_retries(method="GET", max_retries=0, verbose=false, sleep_fn=mock_sleep) do + result = GitHub.with_retries(method="GET", max_retries=0, verbose=false, sleep_fn=mock_sleep, auth_hash=test_auth_hash) do HTTP.Response(200) end @test result.status == 200 @@ -473,7 +479,7 @@ end # Test: POST method should trigger mutation delay (but first call won't wait) empty!(sleep_calls) - result = GitHub.with_retries(method="POST", max_retries=0, verbose=false, sleep_fn=mock_sleep) do + result = GitHub.with_retries(method="POST", max_retries=0, verbose=false, sleep_fn=mock_sleep, auth_hash=test_auth_hash) do HTTP.Response(200) end @test result.status == 200 @@ -481,9 +487,45 @@ end # Test: respect_mutation_delay=false should bypass delay empty!(sleep_calls) - result = GitHub.with_retries(method="POST", max_retries=0, verbose=false, sleep_fn=mock_sleep, respect_mutation_delay=false) do + result = GitHub.with_retries(method="POST", max_retries=0, verbose=false, sleep_fn=mock_sleep, respect_mutation_delay=false, auth_hash=test_auth_hash) do HTTP.Response(200) end @test result.status == 200 @test length(sleep_calls) == 0 # Delay bypassed end + +@testset "Per-auth mutation delay isolation" begin + # Test that different auth hashes have independent mutation delays + sleep_calls = Float64[] + mock_sleep(t) = push!(sleep_calls, t) + + # Create two different auth hashes + auth_hash_1 = UInt64(111) + auth_hash_2 = UInt64(222) + + # Reset mutation state + @lock GitHub.MUTATION_LOCK begin + empty!(GitHub.LAST_MUTATION_TIMESTAMPS) + end + + # First POST with auth_hash_1 should not wait + result1 = GitHub.with_retries(method="POST", max_retries=0, verbose=false, sleep_fn=mock_sleep, auth_hash=auth_hash_1) do + HTTP.Response(200) + end + @test result1.status == 200 + @test length(sleep_calls) == 0 + + # Immediately following POST with auth_hash_2 should also not wait (different auth) + result2 = GitHub.with_retries(method="POST", max_retries=0, verbose=false, sleep_fn=mock_sleep, auth_hash=auth_hash_2) do + HTTP.Response(200) + end + @test result2.status == 200 + @test length(sleep_calls) == 0 # Still no sleep - different auth + + # Verify both auth hashes have separate timestamps + @lock GitHub.MUTATION_LOCK begin + @test haskey(GitHub.LAST_MUTATION_TIMESTAMPS, auth_hash_1) + @test haskey(GitHub.LAST_MUTATION_TIMESTAMPS, auth_hash_2) + @test GitHub.LAST_MUTATION_TIMESTAMPS[auth_hash_1] != GitHub.LAST_MUTATION_TIMESTAMPS[auth_hash_2] + end +end From 976f6f857cff2ff3e417ed2300ea4eacd338f9ad Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Wed, 1 Oct 2025 02:01:29 +0200 Subject: [PATCH 12/15] clean up old entries --- src/utils/auth.jl | 2 ++ src/utils/requests.jl | 11 +++++++++++ test/retries.jl | 27 +++++++++++++++++++++++++++ 3 files changed, 40 insertions(+) diff --git a/src/utils/auth.jl b/src/utils/auth.jl index c657cc4..1eb0681 100644 --- a/src/utils/auth.jl +++ b/src/utils/auth.jl @@ -107,6 +107,8 @@ end # invert the hash to recover the token, as the hash is stored in a global dict # in this module. let + # this would be better as OncePerProcess to get fresh salts per session, but we support old Julia's so we'll + # have one per precompilation. salt = randstring(RandomDevice(), 20) global uint64_hash(str::AbstractString) = first(reinterpret(UInt64, sha256(string(salt, str))[1:8])) end diff --git a/src/utils/requests.jl b/src/utils/requests.jl index 32dd01e..b1741f0 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -16,6 +16,8 @@ const DEFAULT_API = GitHubWebAPI(URIs.URI("https://api.github.com")) const MUTATION_LOCK = ReentrantLock() const LAST_MUTATION_TIMESTAMPS = Dict{UInt64, Float64}() +const LAST_CLEARED_TIMESTAMP = Ref{Float64}(0.0) +const CLEAR_OLDER_THAN_SECONDS = 10*60 # 10 minutes using Base.Meta @@ -193,6 +195,15 @@ function wait_for_mutation_delay(auth_hash; sleep_fn=sleep, time_fn=time) local wait_time # Checking & setting must be atomic to prevent races, so we use a lock @lock MUTATION_LOCK begin + # we clear entries of `LAST_MUTATION_TIMESTAMPS` older than `CLEAR_OLDER_THAN_SECONDS` + # so the global dict doesn't grow forever. But since this could be slow, and we're holding the lock, + # we first check `LAST_CLEARED_TIMESTAMP` to see if we've cleared it recently, and skip it if we have + last_cleared = LAST_CLEARED_TIMESTAMP[] + cutoff_time = now - CLEAR_OLDER_THAN_SECONDS + if cutoff_time >= last_cleared + filter!(kv -> kv[2] >= cutoff_time, LAST_MUTATION_TIMESTAMPS) + LAST_CLEARED_TIMESTAMP[] = now + end last_ts = get(LAST_MUTATION_TIMESTAMPS, auth_hash, 0.0) wait_time = last_ts == 0.0 ? 0.0 : (last_ts + 1.0 - now) if wait_time <= 0 # good to go diff --git a/test/retries.jl b/test/retries.jl index a0b869e..e00b4da 100644 --- a/test/retries.jl +++ b/test/retries.jl @@ -529,3 +529,30 @@ end @test GitHub.LAST_MUTATION_TIMESTAMPS[auth_hash_1] != GitHub.LAST_MUTATION_TIMESTAMPS[auth_hash_2] end end + +@testset "Automatic cleanup integration" begin + times = [0.0, 1.0, 610.0] + time_calls = Ref(0) + mock_time() = (time_calls[] += 1; times[time_calls[]]) + + hash1, hash2 = UInt64(123), UInt64(456) + + @lock GitHub.MUTATION_LOCK begin + empty!(GitHub.LAST_MUTATION_TIMESTAMPS) + GitHub.LAST_CLEARED_TIMESTAMP[] = 0.0 + end + + # Add two entries at t=0.0 and t=1.0 + GitHub.wait_for_mutation_delay(hash1; sleep_fn=t->nothing, time_fn=mock_time) + GitHub.wait_for_mutation_delay(hash2; sleep_fn=t->nothing, time_fn=mock_time) + + # At t=610.0, cutoff=10 triggers cleanup; only hash2 (t=1.0) should be removed + GitHub.wait_for_mutation_delay(hash1; sleep_fn=t->nothing, time_fn=mock_time) + + @lock GitHub.MUTATION_LOCK begin + @test haskey(GitHub.LAST_MUTATION_TIMESTAMPS, hash1) + @test !haskey(GitHub.LAST_MUTATION_TIMESTAMPS, hash2) # Removed (1.0 < cutoff 10.0) + @test GitHub.LAST_MUTATION_TIMESTAMPS[hash1] == 610.0 + @test GitHub.LAST_CLEARED_TIMESTAMP[] == 610.0 + end +end From d52679f6af15afe40e1e8ef3191fa204f50f8f61 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Wed, 1 Oct 2025 02:16:13 +0200 Subject: [PATCH 13/15] switch to reservation system --- src/utils/requests.jl | 13 ++++---- test/retries.jl | 73 ++++++++++++++++++++++++++++++++----------- 2 files changed, 62 insertions(+), 24 deletions(-) diff --git a/src/utils/requests.jl b/src/utils/requests.jl index b1741f0..97cd65f 100644 --- a/src/utils/requests.jl +++ b/src/utils/requests.jl @@ -15,7 +15,7 @@ end const DEFAULT_API = GitHubWebAPI(URIs.URI("https://api.github.com")) const MUTATION_LOCK = ReentrantLock() -const LAST_MUTATION_TIMESTAMPS = Dict{UInt64, Float64}() +const NEXT_AVAILABLE = Dict{UInt64, Float64}() # Maps auth_hash -> next available mutation time const LAST_CLEARED_TIMESTAMP = Ref{Float64}(0.0) const CLEAR_OLDER_THAN_SECONDS = 10*60 # 10 minutes @@ -195,19 +195,20 @@ function wait_for_mutation_delay(auth_hash; sleep_fn=sleep, time_fn=time) local wait_time # Checking & setting must be atomic to prevent races, so we use a lock @lock MUTATION_LOCK begin - # we clear entries of `LAST_MUTATION_TIMESTAMPS` older than `CLEAR_OLDER_THAN_SECONDS` + # we clear entries of `NEXT_AVAILABLE` older than `CLEAR_OLDER_THAN_SECONDS` # so the global dict doesn't grow forever. But since this could be slow, and we're holding the lock, # we first check `LAST_CLEARED_TIMESTAMP` to see if we've cleared it recently, and skip it if we have last_cleared = LAST_CLEARED_TIMESTAMP[] cutoff_time = now - CLEAR_OLDER_THAN_SECONDS if cutoff_time >= last_cleared - filter!(kv -> kv[2] >= cutoff_time, LAST_MUTATION_TIMESTAMPS) + filter!(kv -> kv[2] >= cutoff_time, NEXT_AVAILABLE) LAST_CLEARED_TIMESTAMP[] = now end - last_ts = get(LAST_MUTATION_TIMESTAMPS, auth_hash, 0.0) - wait_time = last_ts == 0.0 ? 0.0 : (last_ts + 1.0 - now) + next_available_time = get(NEXT_AVAILABLE, auth_hash, 0.0) + wait_time = max(0.0, next_available_time - now) if wait_time <= 0 # good to go - LAST_MUTATION_TIMESTAMPS[auth_hash] = now + # Reserve the next available slot: now + 1 second + NEXT_AVAILABLE[auth_hash] = now + 1.0 return nothing end end diff --git a/test/retries.jl b/test/retries.jl index e00b4da..1639804 100644 --- a/test/retries.jl +++ b/test/retries.jl @@ -433,28 +433,28 @@ end # Reset state for clean testing @lock GitHub.MUTATION_LOCK begin - empty!(GitHub.LAST_MUTATION_TIMESTAMPS) + empty!(GitHub.NEXT_AVAILABLE) end - # First call should not wait (no previous mutation) + # First call should not wait (no reservation exists) GitHub.wait_for_mutation_delay(test_auth_hash; sleep_fn=mock_sleep, time_fn=mock_time) @test length(sleep_calls) == 0 @test time_calls[] == 1 - # Second call should wait for remaining time (1.5 + 1.0 - 3.0 = -0.5, so no wait) + # Second call at t=3.0, next_available=2.0, so no wait needed GitHub.wait_for_mutation_delay(test_auth_hash; sleep_fn=mock_sleep, time_fn=mock_time) @test length(sleep_calls) == 0 # No sleep needed since enough time passed # Reset for third test - force a wait scenario @lock GitHub.MUTATION_LOCK begin - GitHub.LAST_MUTATION_TIMESTAMPS[test_auth_hash] = 3.5 # Recent timestamp + GitHub.NEXT_AVAILABLE[test_auth_hash] = 4.5 # Next available at 4.5 end - time_calls[] = 2 # Start at time 3.0 + time_calls[] = 2 # Start at time 4.0 empty!(sleep_calls) GitHub.wait_for_mutation_delay(test_auth_hash; sleep_fn=mock_sleep, time_fn=mock_time) @test length(sleep_calls) == 1 - @test sleep_calls[1] ≈ 0.5 # Should wait 3.5 + 1.0 - 3.0 = 0.5 seconds + @test sleep_calls[1] ≈ 0.5 # Should wait 4.5 - 4.0 = 0.5 seconds end @testset "with_retries mutation delay integration" begin @@ -466,7 +466,7 @@ end # Reset mutation state @lock GitHub.MUTATION_LOCK begin - empty!(GitHub.LAST_MUTATION_TIMESTAMPS) + empty!(GitHub.NEXT_AVAILABLE) end # Test: GET method should not trigger mutation delay @@ -505,7 +505,7 @@ end # Reset mutation state @lock GitHub.MUTATION_LOCK begin - empty!(GitHub.LAST_MUTATION_TIMESTAMPS) + empty!(GitHub.NEXT_AVAILABLE) end # First POST with auth_hash_1 should not wait @@ -522,14 +522,51 @@ end @test result2.status == 200 @test length(sleep_calls) == 0 # Still no sleep - different auth - # Verify both auth hashes have separate timestamps + # Verify both auth hashes have separate reservations @lock GitHub.MUTATION_LOCK begin - @test haskey(GitHub.LAST_MUTATION_TIMESTAMPS, auth_hash_1) - @test haskey(GitHub.LAST_MUTATION_TIMESTAMPS, auth_hash_2) - @test GitHub.LAST_MUTATION_TIMESTAMPS[auth_hash_1] != GitHub.LAST_MUTATION_TIMESTAMPS[auth_hash_2] + @test haskey(GitHub.NEXT_AVAILABLE, auth_hash_1) + @test haskey(GitHub.NEXT_AVAILABLE, auth_hash_2) + @test GitHub.NEXT_AVAILABLE[auth_hash_1] != GitHub.NEXT_AVAILABLE[auth_hash_2] end end +@testset "Reservation system prevents needless wakeups" begin + # Demonstrate how 3 threads arriving nearly simultaneously get properly serialized + # Thread 1 at t=10.0: reserves until t=11.0 + # Thread 2 at t=10.1: sees reservation at t=11.0, waits 0.9s, reserves until t=12.0 + # Thread 3 at t=10.2: sees reservation at t=12.0, waits 1.8s, reserves until t=13.0 + # Without reservations, threads 2 and 3 would compute similar short waits based on last_mutation=10.0, + # so one of them would need to wakeup, compute a new wait, and sleep again. + + test_hash = UInt64(999) + + @lock GitHub.MUTATION_LOCK begin + empty!(GitHub.NEXT_AVAILABLE) + end + + # Thread 1 at t=10.0 + times1 = [10.0] + time_calls1 = Ref(0) + GitHub.wait_for_mutation_delay(test_hash; sleep_fn=t->nothing, time_fn=()->(time_calls1[]+=1; times1[time_calls1[]])) + @test GitHub.NEXT_AVAILABLE[test_hash] == 11.0 + + # Thread 2 at t=10.1 + sleep_calls2 = Float64[] + times2 = [10.1, 11.0] + time_calls2 = Ref(0) + GitHub.wait_for_mutation_delay(test_hash; sleep_fn=t->push!(sleep_calls2,t), time_fn=()->(time_calls2[]+=1; times2[time_calls2[]])) + @test sleep_calls2[1] ≈ 0.9 # Waits until t=11.0 + @test GitHub.NEXT_AVAILABLE[test_hash] == 12.0 + + # Thread 3 at t=10.2 + sleep_calls3 = Float64[] + times3 = [10.2, 12.0] + time_calls3 = Ref(0) + GitHub.wait_for_mutation_delay(test_hash; sleep_fn=t->push!(sleep_calls3,t), time_fn=()->(time_calls3[]+=1; times3[time_calls3[]])) + @test sleep_calls3[1] ≈ 1.8 # Waits until t=12.0 (NOT just 0.9s from last_mutation) + @test GitHub.NEXT_AVAILABLE[test_hash] == 13.0 +end + @testset "Automatic cleanup integration" begin times = [0.0, 1.0, 610.0] time_calls = Ref(0) @@ -538,21 +575,21 @@ end hash1, hash2 = UInt64(123), UInt64(456) @lock GitHub.MUTATION_LOCK begin - empty!(GitHub.LAST_MUTATION_TIMESTAMPS) + empty!(GitHub.NEXT_AVAILABLE) GitHub.LAST_CLEARED_TIMESTAMP[] = 0.0 end - # Add two entries at t=0.0 and t=1.0 + # Add two reservations: hash1->1.0, hash2->2.0 GitHub.wait_for_mutation_delay(hash1; sleep_fn=t->nothing, time_fn=mock_time) GitHub.wait_for_mutation_delay(hash2; sleep_fn=t->nothing, time_fn=mock_time) - # At t=610.0, cutoff=10 triggers cleanup; only hash2 (t=1.0) should be removed + # At t=610.0, cutoff=10 triggers cleanup; hash2 (next_available=2.0) should be removed GitHub.wait_for_mutation_delay(hash1; sleep_fn=t->nothing, time_fn=mock_time) @lock GitHub.MUTATION_LOCK begin - @test haskey(GitHub.LAST_MUTATION_TIMESTAMPS, hash1) - @test !haskey(GitHub.LAST_MUTATION_TIMESTAMPS, hash2) # Removed (1.0 < cutoff 10.0) - @test GitHub.LAST_MUTATION_TIMESTAMPS[hash1] == 610.0 + @test haskey(GitHub.NEXT_AVAILABLE, hash1) + @test !haskey(GitHub.NEXT_AVAILABLE, hash2) # Removed (2.0 < cutoff 10.0) + @test GitHub.NEXT_AVAILABLE[hash1] == 611.0 # Reserved at t=610.0, so next = 611.0 @test GitHub.LAST_CLEARED_TIMESTAMP[] == 610.0 end end From 5bef101ecdf67f66d4280dbb2963c2076ae186e5 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Wed, 1 Oct 2025 02:17:18 +0200 Subject: [PATCH 14/15] relax compat --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 2e373bb..dda2dff 100644 --- a/Project.toml +++ b/Project.toml @@ -19,7 +19,7 @@ HTTP = "1.10.17" # Must be >= 1.10.17, see https://github.com/JuliaWeb/GitHub.jl URIs = "1.6" # Must be >= 1.6, see https://github.com/JuliaWeb/GitHub.jl/pull/225 JSON = "0.19, 0.20, 0.21" MbedTLS = "0.6, 0.7, 1" -Random = "1.11.0" +Random = "1" SHA = "0.7.0, 1" SodiumSeal = "0.1" julia = "1.6" From c5e180a4f2425311b948272fb5037acfca9a439b Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Wed, 1 Oct 2025 02:33:42 +0200 Subject: [PATCH 15/15] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3fb2721..89f8ab1 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ These methods all accept keyword arguments which control how the request is made - `max_retries::Int=5`: how many retries to attempt in requesting the resources. Retries are only made for idempotent requests ("GET", "HEAD", "OPTIONS", "TRACE", "PUT", "DELETE") and delays respect GitHub [rate limit headers](https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api?apiVersion=2022-11-28#checking-the-status-of-your-rate-limit). - `verbose::Bool=true`: whether or not to log retries as Info level logs - `max_sleep_seconds::Real=60*20`: if GitHub.jl intends to sleep for longer than `max_sleep_seconds` before retrying, e.g. due to rate limit headers from GitHub, throws an `RetryDelayException` instead. -- `respect_mutation_delay::Bool=true`: whether or not to ensure at least 1s has passed since the last mutation has been submitted to the GitHub API from GitHub.jl, following GitHub's [recommended practice](https://docs.github.com/en/rest/using-the-rest-api/best-practices-for-using-the-rest-api?apiVersion=2022-11-28#pause-between-mutative-requests). +- `respect_mutation_delay::Bool=true`: whether or not to ensure at least 1s has passed since the last mutation has been submitted to the GitHub API (tracked separately per auth object), following GitHub's [recommended practice](https://docs.github.com/en/rest/using-the-rest-api/best-practices-for-using-the-rest-api?apiVersion=2022-11-28#pause-between-mutative-requests). #### Users and Organizations