Skip to content

Commit

Permalink
Strategy: Use Curl output parsing methods
Browse files Browse the repository at this point in the history
  • Loading branch information
samford committed Apr 19, 2022
1 parent 73deeb4 commit 9e37a03
Showing 1 changed file with 9 additions and 37 deletions.
46 changes: 9 additions & 37 deletions Library/Homebrew/livecheck/strategy.rb
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,6 @@ module Strategy
retries: 0,
}.freeze

# HTTP response head(s) and body are typically separated by a double
# `CRLF` (whereas HTTP header lines are separated by a single `CRLF`).
# In rare cases, this can also be a double newline (`\n\n`).
HTTP_HEAD_BODY_SEPARATOR = "\r\n\r\n"

# A regex used to identify a tarball extension at the end of a string.
TARBALL_EXTENSION_REGEX = /
\.t
Expand Down Expand Up @@ -180,22 +175,17 @@ def self.page_headers(url, homebrew_curl: false)
headers = []

[:default, :browser].each do |user_agent|
stdout, _, status = curl_with_workarounds(
output, _, status = curl_with_workarounds(
*PAGE_HEADERS_CURL_ARGS, url,
**DEFAULT_CURL_OPTIONS,
use_homebrew_curl: homebrew_curl,
user_agent: user_agent
)
next unless status.success?

while stdout.match?(/\AHTTP.*\r$/)
h, stdout = stdout.split("\r\n\r\n", 2)

headers << h.split("\r\n").drop(1)
.to_h { |header| header.split(/:\s*/, 2) }
.transform_keys(&:downcase)
end

return headers if status.success?
parsed_output = parse_curl_output(output)
parsed_output[:responses].each { |response| headers << response[:headers] }
return headers if headers.present?
end

headers
Expand All @@ -211,8 +201,6 @@ def self.page_headers(url, homebrew_curl: false)
# @return [Hash]
sig { params(url: String, homebrew_curl: T::Boolean).returns(T::Hash[Symbol, T.untyped]) }
def self.page_content(url, homebrew_curl: false)
original_url = url

stderr = nil
[:default, :browser].each do |user_agent|
stdout, stderr, status = curl_with_workarounds(
Expand All @@ -229,27 +217,11 @@ def self.page_content(url, homebrew_curl: false)

# Separate the head(s)/body and identify the final URL (after any
# redirections)
max_iterations = 5
iterations = 0
output = output.lstrip
while output.match?(%r{\AHTTP/[\d.]+ \d+}) && output.include?(HTTP_HEAD_BODY_SEPARATOR)
iterations += 1
raise "Too many redirects (max = #{max_iterations})" if iterations > max_iterations

head_text, _, output = output.partition(HTTP_HEAD_BODY_SEPARATOR)
output = output.lstrip

location = head_text[/^Location:\s*(.*)$/i, 1]
next if location.blank?

location.chomp!
# Convert a relative redirect URL to an absolute URL
location = URI.join(url, location) unless location.match?(PageMatch::URL_MATCH_REGEX)
final_url = location
end
parsed_output = parse_curl_output(output)
final_url = curl_response_last_location(parsed_output[:responses], absolutize: true, base_url: url)

data = { content: output }
data[:final_url] = final_url if final_url.present? && final_url != original_url
data = { content: parsed_output[:body] }
data[:final_url] = final_url if final_url.present? && final_url != url
return data
end

Expand Down

0 comments on commit 9e37a03

Please sign in to comment.