Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: use shared checks for URL cop #17521

Merged
merged 1 commit into from
Jun 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Library/Homebrew/rubocops/cask/url.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# typed: true
# frozen_string_literal: true

require "rubocops/shared/url_helper"

module RuboCop
module Cop
module Cask
Expand All @@ -21,12 +23,16 @@ class Url < Base
extend AutoCorrector
extend Forwardable
include OnUrlStanza
include UrlHelper

def on_url_stanza(stanza)
return if stanza.stanza_node.block_type?

url_stanza = stanza.stanza_node.first_argument
hash_node = stanza.stanza_node.last_argument

audit_url(:cask, [stanza.stanza_node], [], livecheck_url: false)

return unless hash_node.hash_type?

hash_node.each_pair do |key_node, value_node|
Expand Down
268 changes: 268 additions & 0 deletions Library/Homebrew/rubocops/shared/url_helper.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,268 @@
# typed: true
# frozen_string_literal: true

require "rubocops/shared/helper_functions"

module RuboCop
module Cop
# This module performs common checks the `homepage` field in both formulae and casks.
module UrlHelper
include HelperFunctions

# Yields to block when there is a match.
#
# @param urls [Array] url/mirror method call nodes
# @param regex [Regexp] pattern to match URLs
def audit_urls(urls, regex)
urls.each_with_index do |url_node, index|
if @type == :cask
url_string_node = url_node.first_argument
url_string = url_node.source
else
url_string_node = parameters(url_node).first
url_string = string_content(url_string_node)
end

match_object = regex_match_group(url_string_node, regex)
next unless match_object

offending_node(url_string_node.parent)

yield match_object, url_string, index
end
end

def audit_url(type, urls, mirrors, livecheck_url: false)
@type = type

# GNU URLs; doesn't apply to mirrors
gnu_pattern = %r{^(?:https?|ftp)://ftpmirror\.gnu\.org/(.*)}
audit_urls(urls, gnu_pattern) do |match, url|
problem "Please use \"https://ftp.gnu.org/gnu/#{match[1]}\" instead of #{url}."
end

# Fossies upstream requests they aren't used as primary URLs
# https://github.com/Homebrew/homebrew-core/issues/14486#issuecomment-307753234
fossies_pattern = %r{^https?://fossies\.org/}
audit_urls(urls, fossies_pattern) do
problem "Please don't use fossies.org in the url (using as a mirror is fine)"
end

apache_pattern = %r{^https?://(?:[^/]*\.)?apache\.org/(?:dyn/closer\.cgi\?path=/?|dist/)(.*)}i
audit_urls(urls, apache_pattern) do |match, url|
next if url == livecheck_url

problem "#{url} should be `https://www.apache.org/dyn/closer.lua?path=#{match[1]}`"
end

version_control_pattern = %r{^(cvs|bzr|hg|fossil)://}
audit_urls(urls, version_control_pattern) do |match, _|
problem "Use of the #{match[1]}:// scheme is deprecated, pass `:using => :#{match[1]}` instead"
end

svn_pattern = %r{^svn\+http://}
audit_urls(urls, svn_pattern) do |_, _|
problem "Use of the svn+http:// scheme is deprecated, pass `:using => :svn` instead"
end

audit_urls(mirrors, /.*/) do |_, mirror|
urls.each do |url|
url_string = string_content(parameters(url).first)
next unless url_string.eql?(mirror)

problem "URL should not be duplicated as a mirror: #{url_string}"
end
end

urls += mirrors

# Check a variety of SSL/TLS URLs that don't consistently auto-redirect
# or are overly common errors that need to be reduced & fixed over time.
http_to_https_patterns = Regexp.union([%r{^http://ftp\.gnu\.org/},
%r{^http://ftpmirror\.gnu\.org/},
%r{^http://download\.savannah\.gnu\.org/},
%r{^http://download-mirror\.savannah\.gnu\.org/},
%r{^http://(?:[^/]*\.)?apache\.org/},
%r{^http://code\.google\.com/},
%r{^http://fossies\.org/},
%r{^http://mirrors\.kernel\.org/},
%r{^http://mirrors\.ocf\.berkeley\.edu/},
%r{^http://(?:[^/]*\.)?bintray\.com/},
%r{^http://tools\.ietf\.org/},
%r{^http://launchpad\.net/},
%r{^http://github\.com/},
%r{^http://bitbucket\.org/},
%r{^http://anonscm\.debian\.org/},
%r{^http://cpan\.metacpan\.org/},
%r{^http://hackage\.haskell\.org/},
%r{^http://(?:[^/]*\.)?archive\.org},
%r{^http://(?:[^/]*\.)?freedesktop\.org},
%r{^http://(?:[^/]*\.)?mirrorservice\.org/}])
audit_urls(urls, http_to_https_patterns) do |_, url, index|
# It's fine to have a plain HTTP mirror further down the mirror list.
https_url = url.dup.insert(4, "s")
https_index = T.let(nil, T.nilable(Integer))
audit_urls(urls, https_url) do |_, _, found_https_index|
https_index = found_https_index

Check warning on line 106 in Library/Homebrew/rubocops/shared/url_helper.rb

View check run for this annotation

Codecov / codecov/patch

Library/Homebrew/rubocops/shared/url_helper.rb#L106

Added line #L106 was not covered by tests
end
problem "Please use https:// for #{url}" if !https_index || https_index > index
end

apache_mirror_pattern = %r{^https?://(?:[^/]*\.)?apache\.org/dyn/closer\.(?:cgi|lua)\?path=/?(.*)}i
audit_urls(mirrors, apache_mirror_pattern) do |match, mirror|
problem "Please use `https://archive.apache.org/dist/#{match[1]}` as a mirror instead of #{mirror}."

Check warning on line 113 in Library/Homebrew/rubocops/shared/url_helper.rb

View check run for this annotation

Codecov / codecov/patch

Library/Homebrew/rubocops/shared/url_helper.rb#L113

Added line #L113 was not covered by tests
end

cpan_pattern = %r{^http://search\.mcpan\.org/CPAN/(.*)}i
audit_urls(urls, cpan_pattern) do |match, url|
problem "#{url} should be `https://cpan.metacpan.org/#{match[1]}`"
end

gnome_pattern = %r{^(http|ftp)://ftp\.gnome\.org/pub/gnome/(.*)}i
audit_urls(urls, gnome_pattern) do |match, url|
problem "#{url} should be `https://download.gnome.org/#{match[2]}`"
end

debian_pattern = %r{^git://anonscm\.debian\.org/users/(.*)}i
audit_urls(urls, debian_pattern) do |match, url|
problem "#{url} should be `https://anonscm.debian.org/git/users/#{match[1]}`"
end

# Prefer HTTP/S when possible over FTP protocol due to possible firewalls.
mirror_service_pattern = %r{^ftp://ftp\.mirrorservice\.org}
audit_urls(urls, mirror_service_pattern) do |_, url|
problem "Please use https:// for #{url}"
end

cpan_ftp_pattern = %r{^ftp://ftp\.cpan\.org/pub/CPAN(.*)}i
audit_urls(urls, cpan_ftp_pattern) do |match_obj, url|
problem "#{url} should be `http://search.cpan.org/CPAN#{match_obj[1]}`"
end

# SourceForge url patterns
sourceforge_patterns = %r{^https?://.*\b(sourceforge|sf)\.(com|net)}
audit_urls(urls, sourceforge_patterns) do |_, url|
# Skip if the URL looks like a SVN repository.
next if url.include? "/svnroot/"
next if url.include? "svn.sourceforge"
next if url.include? "/p/"

if url =~ /(\?|&)use_mirror=/
problem "Don't use #{Regexp.last_match(1)}use_mirror in SourceForge urls (url is #{url})."
end

problem "Don't use /download in SourceForge urls (url is #{url})." if url.end_with?("/download")

if url.match?(%r{^https?://sourceforge\.}) && url != livecheck_url
problem "Use https://downloads.sourceforge.net to get geolocation (url is #{url})."
end

if url.match?(%r{^https?://prdownloads\.})
problem "Don't use prdownloads in SourceForge urls (url is #{url})."
end

if url.match?(%r{^http://\w+\.dl\.})
problem "Don't use specific dl mirrors in SourceForge urls (url is #{url})."
end

problem "Please use https:// for #{url}" if url.start_with? "http://downloads"
end

# Debian has an abundance of secure mirrors. Let's not pluck the insecure
# one out of the grab bag.
unsecure_deb_pattern = %r{^http://http\.debian\.net/debian/(.*)}i
audit_urls(urls, unsecure_deb_pattern) do |match, _|
problem <<~EOS
Please use a secure mirror for Debian URLs.
We recommend:
https://deb.debian.org/debian/#{match[1]}
EOS
end

# Check to use canonical URLs for Debian packages
noncanon_deb_pattern =
Regexp.union([%r{^https://mirrors\.kernel\.org/debian/},
%r{^https://mirrors\.ocf\.berkeley\.edu/debian/},
%r{^https://(?:[^/]*\.)?mirrorservice\.org/sites/ftp\.debian\.org/debian/}])
audit_urls(urls, noncanon_deb_pattern) do |_, url|
problem "Please use https://deb.debian.org/debian/ for #{url}"
end

# Check for new-url Google Code download URLs, https:// is preferred
google_code_pattern = Regexp.union([%r{^http://[A-Za-z0-9\-.]*\.googlecode\.com/files.*},
%r{^http://code\.google\.com/}])
audit_urls(urls, google_code_pattern) do |_, url|
problem "Please use https:// for #{url}"
end

# Check for `git://` GitHub repository URLs, https:// is preferred.
git_gh_pattern = %r{^git://[^/]*github\.com/}
audit_urls(urls, git_gh_pattern) do |_, url|
problem "Please use https:// for #{url}"
end

# Check for `git://` Gitorious repository URLs, https:// is preferred.
git_gitorious_pattern = %r{^git://[^/]*gitorious\.org/}
audit_urls(urls, git_gitorious_pattern) do |_, url|
problem "Please use https:// for #{url}"
end

# Check for `http://` GitHub repository URLs, https:// is preferred.
gh_pattern = %r{^http://github\.com/.*\.git$}
audit_urls(urls, gh_pattern) do |_, url|
problem "Please use https:// for #{url}"
end

# Check for master branch GitHub archives.
if type == :formula
tarball_gh_pattern = %r{^https://github\.com/.*archive/master\.(tar\.gz|zip)$}
audit_urls(urls, tarball_gh_pattern) do
problem "Use versioned rather than branch tarballs for stable checksums."
end
end

# Use new-style archive downloads.
archive_gh_pattern = %r{https://.*github.*/(?:tar|zip)ball/}
audit_urls(urls, archive_gh_pattern) do |_, url|
next if url.end_with?(".git")

problem "Use /archive/ URLs for GitHub tarballs (url is #{url})."
end

archive_refs_gh_pattern = %r{https://.*github.+/archive/(?![a-fA-F0-9]{40})(?!refs/(tags|heads)/)(.*)\.tar\.gz$}
audit_urls(urls, archive_refs_gh_pattern) do |match, url|
next if url.end_with?(".git")

problem "Use refs/tags/#{match[2]} or refs/heads/#{match[2]} for GitHub references (url is #{url})."

Check warning on line 236 in Library/Homebrew/rubocops/shared/url_helper.rb

View check run for this annotation

Codecov / codecov/patch

Library/Homebrew/rubocops/shared/url_helper.rb#L236

Added line #L236 was not covered by tests
end

# Don't use GitHub .zip files
zip_gh_pattern = %r{https://.*github.*/(archive|releases)/.*\.zip$}
audit_urls(urls, zip_gh_pattern) do |_, url|
next if url.match? %r{raw.githubusercontent.com/.*/.*/(main|master|HEAD)/}
next if url.include?("releases/download")
next if url.include?("desktop.githubusercontent.com/github-desktop/releases/")

problem "Use GitHub tarballs rather than zipballs (url is #{url})."
end

# Don't use GitHub codeload URLs
codeload_gh_pattern = %r{https?://codeload\.github\.com/(.+)/(.+)/(?:tar\.gz|zip)/(.+)}
audit_urls(urls, codeload_gh_pattern) do |match, url|
problem <<~EOS
Use GitHub archive URLs:
https://github.com/#{match[1]}/#{match[2]}/archive/#{match[3]}.tar.gz
Rather than codeload:
#{url}
EOS
end

# Check for Maven Central URLs, prefer HTTPS redirector over specific host
maven_pattern = %r{https?://(?:central|repo\d+)\.maven\.org/maven2/(.+)$}
audit_urls(urls, maven_pattern) do |match, url|
problem "#{url} should be `https://search.maven.org/remotecontent?filepath=#{match[1]}`"
end
end
end
end
end
Loading