Skip to content

Commit

Permalink
url_protected_by_*: Check multiple headers
Browse files Browse the repository at this point in the history
Before `#parse_curl_output` was introduced and related methods were
updated to use it, `#url_protected_by_cloudflare?` and
`#url_protected_by_incapsula?` were checking a string of all the
headers from a response and using a regex to check related header
values.

However, when `#curl_http_content_headers_and_checksum` was updated
to use `#parse_curl_output` internally, the `:headers` value became
a hash generated by `#parse_curl_response`. The `#url_protected_by_*`
methods were updated to work with the hash value but this wasn't able
to fully replicate the previous behavior because
`#parse_curl_response` was only keeping the last instance of a given
header (maintaining pre-existing behavior). This is an issue for
these methods because they check `Set-Cookie` headers and there can
be multiple instances of this header in a response.

This commit updates these methods to handle an array of strings in
addition to the existing string support. This change ensures that
these methods properly check all `Set-Cookie` headers, effectively
reinstating the previous behavior.

Past that, this updates one of the early return values in
`#url_protected_by_cloudflare?` to be `false` instead of an implicit
`nil`. After adding a type signature to this method, it became clear
that it wasn't always returning a boolean value and this fixes it.
  • Loading branch information
samford committed May 6, 2022
1 parent 94449d0 commit 40b8fd3
Show file tree
Hide file tree
Showing 2 changed files with 173 additions and 4 deletions.
149 changes: 149 additions & 0 deletions Library/Homebrew/test/utils/curl_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,115 @@
require "utils/curl"

describe "Utils::Curl" do
let(:details) {
details = {
normal: {},
cloudflare: {},
incapsula: {},
}

details[:normal][:no_cookie] = {
url: "https://www.example.com/",
final_url: nil,
status: "403",
headers: {
"age" => "123456",
"cache-control" => "max-age=604800",
"content-type" => "text/html; charset=UTF-8",
"date" => "Wed, 1 Jan 2020 01:23:45 GMT",
"etag" => "\"3147526947+ident\"",
"expires" => "Wed, 31 Jan 2020 01:23:45 GMT",
"last-modified" => "Wed, 1 Jan 2020 00:00:00 GMT",
"server" => "ECS (dcb/7EA2)",
"vary" => "Accept-Encoding",
"x-cache" => "HIT",
"content-length" => "3",
},
etag: "3147526947+ident",
content_length: "3",
file: "...",
file_hash: nil,
}

details[:normal][:ok] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
details[:normal][:ok][:status] = "200"

details[:normal][:single_cookie] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
details[:normal][:single_cookie][:headers]["set-cookie"] = "a_cookie=for_testing"

details[:normal][:multiple_cookies] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
details[:normal][:multiple_cookies][:headers]["set-cookie"] = [
"first_cookie=for_testing",
"last_cookie=also_for_testing",
]

details[:normal][:blank_headers] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
details[:normal][:blank_headers][:headers] = {}

details[:cloudflare][:single_cookie] = {
url: "https://www.example.com/",
final_url: nil,
status: "403",
headers: {
"date" => "Wed, 1 Jan 2020 01:23:45 GMT",
"content-type" => "text/plain; charset=UTF-8",
"content-length" => "16",
"x-frame-options" => "SAMEORIGIN",
"referrer-policy" => "same-origin",
"cache-control" => "private, max-age=0, no-store, no-cache, must-revalidate, post-check=0, pre-check=0",
"expires" => "Thu, 01 Jan 1970 00:00:01 GMT",
"expect-ct" => "max-age=604800, report-uri=\"https://report-uri.cloudflare.com/cdn-cgi/beacon/expect-ct\"",
"set-cookie" => "__cf_bm=0123456789abcdef; path=/; expires=Wed, 31-Jan-20 01:23:45 GMT;" \
" domain=www.example.com; HttpOnly; Secure; SameSite=None",
"server" => "cloudflare",
"cf-ray" => "0123456789abcdef-IAD",
"alt-svc" => "h3=\":443\"; ma=86400, h3-29=\":443\"; ma=86400",
},
etag: nil,
content_length: "16",
file: "error code: 1020",
file_hash: nil,
}

details[:cloudflare][:multiple_cookies] = Marshal.load(Marshal.dump(details[:cloudflare][:single_cookie]))
details[:cloudflare][:multiple_cookies][:headers]["set-cookie"] = [
"first_cookie=for_testing",
"__cf_bm=abcdef0123456789; path=/; expires=Thu, 28-Apr-22 18:38:40 GMT; domain=www.example.com; HttpOnly;" \
" Secure; SameSite=None",
"last_cookie=also_for_testing",
]

details[:cloudflare][:no_server] = Marshal.load(Marshal.dump(details[:cloudflare][:single_cookie]))
details[:cloudflare][:no_server][:headers].delete("server")

details[:cloudflare][:wrong_server] = Marshal.load(Marshal.dump(details[:cloudflare][:single_cookie]))
details[:cloudflare][:wrong_server][:headers]["server"] = "nginx 1.2.3"

# TODO: Make the Incapsula test data more realistic once we can find an
# example website to reference.
details[:incapsula][:single_cookie_visid_incap] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
details[:incapsula][:single_cookie_visid_incap][:headers]["set-cookie"] = "visid_incap_something=something"

details[:incapsula][:single_cookie_incap_ses] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
details[:incapsula][:single_cookie_incap_ses][:headers]["set-cookie"] = "incap_ses_something=something"

details[:incapsula][:multiple_cookies_visid_incap] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
details[:incapsula][:multiple_cookies_visid_incap][:headers]["set-cookie"] = [
"first_cookie=for_testing",
"visid_incap_something=something",
"last_cookie=also_for_testing",
]

details[:incapsula][:multiple_cookies_incap_ses] = Marshal.load(Marshal.dump(details[:normal][:no_cookie]))
details[:incapsula][:multiple_cookies_incap_ses][:headers]["set-cookie"] = [
"first_cookie=for_testing",
"incap_ses_something=something",
"last_cookie=also_for_testing",
]

details
}

let(:location_urls) {
%w[
https://example.com/example/
Expand Down Expand Up @@ -294,6 +403,46 @@
end
end

describe "url_protected_by_cloudflare?" do
it "returns `true` when a URL is protected by Cloudflare" do
expect(url_protected_by_cloudflare?(details[:cloudflare][:single_cookie])).to be(true)
expect(url_protected_by_cloudflare?(details[:cloudflare][:multiple_cookies])).to be(true)
end

it "returns `false` when a URL is not protected by Cloudflare" do
expect(url_protected_by_cloudflare?(details[:cloudflare][:no_server])).to be(false)
expect(url_protected_by_cloudflare?(details[:cloudflare][:wrong_server])).to be(false)
expect(url_protected_by_cloudflare?(details[:normal][:no_cookie])).to be(false)
expect(url_protected_by_cloudflare?(details[:normal][:ok])).to be(false)
expect(url_protected_by_cloudflare?(details[:normal][:single_cookie])).to be(false)
expect(url_protected_by_cloudflare?(details[:normal][:multiple_cookies])).to be(false)
end

it "returns `false` when response headers are blank" do
expect(url_protected_by_cloudflare?(details[:normal][:blank_headers])).to be(false)
end
end

describe "url_protected_by_incapsula?" do
it "returns `true` when a URL is protected by Cloudflare" do
expect(url_protected_by_incapsula?(details[:incapsula][:single_cookie_visid_incap])).to be(true)
expect(url_protected_by_incapsula?(details[:incapsula][:single_cookie_incap_ses])).to be(true)
expect(url_protected_by_incapsula?(details[:incapsula][:multiple_cookies_visid_incap])).to be(true)
expect(url_protected_by_incapsula?(details[:incapsula][:multiple_cookies_incap_ses])).to be(true)
end

it "returns `false` when a URL is not protected by Incapsula" do
expect(url_protected_by_incapsula?(details[:normal][:no_cookie])).to be(false)
expect(url_protected_by_incapsula?(details[:normal][:ok])).to be(false)
expect(url_protected_by_incapsula?(details[:normal][:single_cookie])).to be(false)
expect(url_protected_by_incapsula?(details[:normal][:multiple_cookies])).to be(false)
end

it "returns `false` when response headers are blank" do
expect(url_protected_by_incapsula?(details[:normal][:blank_headers])).to be(false)
end
end

describe "#parse_curl_output" do
it "returns a correct hash when curl output contains response(s) and body" do
expect(parse_curl_output("#{response_text[:ok]}#{body[:default]}"))
Expand Down
28 changes: 24 additions & 4 deletions Library/Homebrew/utils/curl.rb
Original file line number Diff line number Diff line change
Expand Up @@ -198,20 +198,40 @@ def curl_output(*args, **options)
end

# Check if a URL is protected by CloudFlare (e.g. badlion.net and jaxx.io).
# @param details [Hash] Response information from
# `#curl_http_content_headers_and_checksum`.
# @return [true, false] Whether a response contains headers indicating that
# the URL is protected by Cloudflare.
sig { params(details: T::Hash[Symbol, T.untyped]).returns(T::Boolean) }
def url_protected_by_cloudflare?(details)
return false if details[:headers].blank?
return unless [403, 503].include?(details[:status].to_i)
return false unless [403, 503].include?(details[:status].to_i)

details[:headers].fetch("set-cookie", nil)&.match?(/^(__cfduid|__cf_bm)=/i) &&
details[:headers].fetch("server", nil)&.match?(/^cloudflare/i)
set_cookie_header = Array(details[:headers]["set-cookie"])
has_cloudflare_cookie_header = set_cookie_header.compact.any? do |cookie|
cookie.match?(/^(__cfduid|__cf_bm)=/i)
end

server_header = Array(details[:headers]["server"])
has_cloudflare_server = server_header.compact.any? do |server|
server.match?(/^cloudflare/i)
end

has_cloudflare_cookie_header && has_cloudflare_server
end

# Check if a URL is protected by Incapsula (e.g. corsair.com).
# @param details [Hash] Response information from
# `#curl_http_content_headers_and_checksum`.
# @return [true, false] Whether a response contains headers indicating that
# the URL is protected by Incapsula.
sig { params(details: T::Hash[Symbol, T.untyped]).returns(T::Boolean) }
def url_protected_by_incapsula?(details)
return false if details[:headers].blank?
return false if details[:status].to_i != 403

details[:headers].fetch("set-cookie", nil)&.match?(/^(visid_incap|incap_ses)_/i)
set_cookie_header = Array(details[:headers]["set-cookie"])
set_cookie_header.compact.any? { |cookie| cookie.match?(/^(visid_incap|incap_ses)_/i) }
end

def curl_check_http_content(url, url_type, specs: {}, user_agents: [:default],
Expand Down

0 comments on commit 40b8fd3

Please sign in to comment.