Skip to content

Commit

Permalink
Merge pull request #15923 from MikeMcQuaid/brew_contributions_improve…
Browse files Browse the repository at this point in the history
…ments

dev-cmd/contributions: usability/performance improvements.
  • Loading branch information
MikeMcQuaid committed Aug 30, 2023
2 parents 340b982 + d357607 commit 59cc141
Show file tree
Hide file tree
Showing 8 changed files with 121 additions and 110 deletions.
130 changes: 63 additions & 67 deletions Library/Homebrew/dev-cmd/contributions.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,28 +13,32 @@ module Homebrew
OFFICIAL_CMD_TAPS.keys.map { |t| t.delete_prefix("homebrew/") },
OFFICIAL_CASK_TAPS.reject { |t| t == "cask" },
].flatten.freeze
MAX_REPO_COMMITS = 1000

sig { returns(CLI::Parser) }
def contributions_args
Homebrew::CLI::Parser.new do
usage_banner "`contributions` [--user=<email|username>] [<--repositories>`=`] [<--csv>]"
description <<~EOS
Contributions to Homebrew repos.
Contributions to Homebrew repositories.
EOS

comma_array "--repositories",
description: "Specify a comma-separated (no spaces) list of repositories to search. " \
description: "Specify a comma-separated list of repositories to search. " \
"Supported repositories: #{SUPPORTED_REPOS.map { |t| "`#{t}`" }.to_sentence}. " \
"Omitting this flag, or specifying `--repositories=all`, searches all repositories. " \
"Use `--repositories=primary` to search only the main repositories: brew,core,cask."
"Omitting this flag, or specifying `--repositories=primary`, searches only the " \
"main repositories: brew,core,cask. " \
"Specifying `--repositories=all`, searches all repositories. "
flag "--from=",
description: "Date (ISO-8601 format) to start searching contributions."
description: "Date (ISO-8601 format) to start searching contributions. " \
"Omitting this flag searches the last year."

flag "--to=",
description: "Date (ISO-8601 format) to stop searching contributions."

flag "--user=",
description: "A GitHub username or email address of a specific person to find contribution data for."
comma_array "--user=",
description: "Specify a comma-separated list of GitHub usernames or email addresses to find " \
"contributions from. Omitting this flag searches maintainers."

switch "--csv",
description: "Print a CSV of contributions across repositories over the time period."
Expand All @@ -48,41 +52,47 @@ def contributions
results = {}
grand_totals = {}

all_repos = args.repositories.nil? || args.repositories.include?("all")
repos = if all_repos
SUPPORTED_REPOS
elsif args.repositories.include?("primary")
repos = if args.repositories.blank? || args.repositories.include?("primary")
PRIMARY_REPOS
elsif args.repositories.include?("all")
SUPPORTED_REPOS
else
args.repositories
end

if args.user
user = args.user
results[user] = scan_repositories(repos, user, args)
grand_totals[user] = total(results[user])
from = args.from.presence || Date.today.prev_year.iso8601

puts "#{user} contributed #{Utils.pluralize("time", grand_totals[user].values.sum,
include_count: true)} #{time_period(args)}."
puts generate_csv(T.must(user), results[user], grand_totals[user]) if args.csv?
return
end
contribution_types = [:author, :committer, :coauthorship, :review]

maintainers = GitHub.members_by_team("Homebrew", "maintainers")
maintainers.each do |username, _|
users = args.user.presence || GitHub.members_by_team("Homebrew", "maintainers")
users.each do |username, _|
# TODO: Using the GitHub username to scan the `git log` undercounts some
# contributions as people might not always have configured their Git
# committer details to match the ones on GitHub.
# TODO: Switch to using the GitHub APIs instead of `git log` if
# they ever support trailers.
results[username] = scan_repositories(repos, username, args)
results[username] = scan_repositories(repos, username, args, from: from)
grand_totals[username] = total(results[username])

puts "#{username} contributed #{Utils.pluralize("time", grand_totals[username].values.sum,
include_count: true)} #{time_period(args)}."
contributions = contribution_types.map do |type|
type_count = grand_totals[username][type]
next if type_count.to_i.zero?

"#{Utils.pluralize("time", type_count, include_count: true)} (#{type})"
end.compact
contributions << "#{Utils.pluralize("time", grand_totals[username].values.sum, include_count: true)} (total)"

puts [
"#{username} contributed",
*contributions.to_sentence,
"#{time_period(from: from, to: args.to)}.",
].join(" ")
end

puts generate_maintainers_csv(grand_totals) if args.csv?
return unless args.csv?

puts
puts generate_csv(grand_totals)
end

sig { params(repo: String).returns(Pathname) }
Expand All @@ -92,63 +102,44 @@ def find_repo_path_for_repo(repo)
Tap.fetch("homebrew", repo).path
end

sig { params(args: Homebrew::CLI::Args).returns(String) }
def time_period(args)
if args.from && args.to
"between #{args.from} and #{args.to}"
elsif args.from
"after #{args.from}"
elsif args.to
"before #{args.to}"
sig { params(from: T.nilable(String), to: T.nilable(String)).returns(String) }
def time_period(from:, to:)
if from && to
"between #{from} and #{to}"
elsif from
"after #{from}"
elsif to
"before #{to}"
else
"in all time"
end
end

sig { params(totals: Hash).returns(String) }
def generate_maintainers_csv(totals)
def generate_csv(totals)
CSV.generate do |csv|
csv << %w[user repo author committer coauthorships reviews total]
csv << %w[user repo author committer coauthorship review total]

totals.sort_by { |_, v| -v.values.sum }.each do |user, total|
csv << grand_total_row(user, total)
end
end
end

sig { params(user: String, results: Hash, grand_total: Hash).returns(String) }
def generate_csv(user, results, grand_total)
CSV.generate do |csv|
csv << %w[user repo author committer coauthorships reviews total]
results.each do |repo, counts|
csv << [
user,
repo,
counts[:author],
counts[:committer],
counts[:coauthorships],
counts[:reviews],
counts.values.sum,
]
end
csv << grand_total_row(user, grand_total)
end
end

sig { params(user: String, grand_total: Hash).returns(Array) }
def grand_total_row(user, grand_total)
[
user,
"all",
grand_total[:author],
grand_total[:committer],
grand_total[:coauthorships],
grand_total[:reviews],
grand_total[:coauthorship],
grand_total[:review],
grand_total.values.sum,
]
end

def scan_repositories(repos, person, args)
def scan_repositories(repos, person, args, from:)
data = {}

repos.each do |repo|
Expand All @@ -171,11 +162,13 @@ def scan_repositories(repos, person, args)

puts "Determining contributions for #{person} on #{repo_full_name}..." if args.verbose?

author_commits, committer_commits = GitHub.count_repo_commits(repo_full_name, person, args,
max: MAX_REPO_COMMITS)
data[repo] = {
author: GitHub.count_repo_commits(repo_full_name, person, "author", args),
committer: GitHub.count_repo_commits(repo_full_name, person, "committer", args),
coauthorships: git_log_trailers_cmd(T.must(repo_path), person, "Co-authored-by", args),
reviews: count_reviews(repo_full_name, person, args),
author: author_commits,
committer: committer_commits,
coauthorship: git_log_trailers_cmd(T.must(repo_path), person, "Co-authored-by", from: from, to: args.to),
review: count_reviews(repo_full_name, person, args),
}
end

Expand All @@ -184,7 +177,7 @@ def scan_repositories(repos, person, args)

sig { params(results: Hash).returns(Hash) }
def total(results)
totals = { author: 0, committer: 0, coauthorships: 0, reviews: 0 }
totals = { author: 0, committer: 0, coauthorship: 0, review: 0 }

results.each_value do |counts|
counts.each do |kind, count|
Expand All @@ -195,12 +188,15 @@ def total(results)
totals
end

sig { params(repo_path: Pathname, person: String, trailer: String, args: Homebrew::CLI::Args).returns(Integer) }
def git_log_trailers_cmd(repo_path, person, trailer, args)
sig {
params(repo_path: Pathname, person: String, trailer: String, from: T.nilable(String),
to: T.nilable(String)).returns(Integer)
}
def git_log_trailers_cmd(repo_path, person, trailer, from:, to:)
cmd = ["git", "-C", repo_path, "log", "--oneline"]
cmd << "--format='%(trailers:key=#{trailer}:)'"
cmd << "--before=#{args.to}" if args.to
cmd << "--after=#{args.from}" if args.from
cmd << "--before=#{to}" if to
cmd << "--after=#{from}" if from

Utils.safe_popen_read(*cmd).lines.count { |l| l.include?(person) }
end
Expand Down
33 changes: 16 additions & 17 deletions Library/Homebrew/test/utils/github_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -90,49 +90,48 @@

it "counts commits authored by a user" do
allow(described_class).to receive(:repo_commits_for_user)
.with("homebrew/cask", "user1", "author", {}).and_return(five_shas)
.with("homebrew/cask", "user1", "author", {}, nil).and_return(five_shas)
allow(described_class).to receive(:repo_commits_for_user)
.with("homebrew/cask", "user1", "committer", {}, nil).and_return([])

expect(described_class.count_repo_commits("homebrew/cask", "user1", "author", {})).to eq(5)
expect(described_class.count_repo_commits("homebrew/cask", "user1", {})).to eq([5, 0])
end

it "counts commits committed by a user" do
allow(described_class).to receive(:repo_commits_for_user)
.with("homebrew/core", "user1", "author", {}).and_return([])
.with("homebrew/core", "user1", "author", {}, nil).and_return([])
allow(described_class).to receive(:repo_commits_for_user)
.with("homebrew/core", "user1", "committer", {}).and_return(five_shas)
.with("homebrew/core", "user1", "committer", {}, nil).and_return(five_shas)

expect(described_class.count_repo_commits("homebrew/core", "user1", "committer", {})).to eq(5)
expect(described_class.count_repo_commits("homebrew/core", "user1", {})).to eq([0, 5])
end

it "calculates correctly when authored > committed with different shas" do
allow(described_class).to receive(:repo_commits_for_user)
.with("homebrew/cask", "user1", "author", {}).and_return(ten_shas)
.with("homebrew/cask", "user1", "author", {}, nil).and_return(ten_shas)
allow(described_class).to receive(:repo_commits_for_user)
.with("homebrew/cask", "user1", "committer", {}).and_return(%w[1 2 3 4 5])
.with("homebrew/cask", "user1", "committer", {}, nil).and_return(%w[1 2 3 4 5])

expect(described_class.count_repo_commits("homebrew/cask", "user1", "author", {})).to eq(10)
expect(described_class.count_repo_commits("homebrew/cask", "user1", "committer", {})).to eq(5)
expect(described_class.count_repo_commits("homebrew/cask", "user1", {})).to eq([10, 5])
end

it "calculates correctly when committed > authored" do
allow(described_class).to receive(:repo_commits_for_user)
.with("homebrew/cask", "user1", "author", {}).and_return(five_shas)
.with("homebrew/cask", "user1", "author", {}, nil).and_return(five_shas)
allow(described_class).to receive(:repo_commits_for_user)
.with("homebrew/cask", "user1", "committer", {}).and_return(ten_shas)
.with("homebrew/cask", "user1", "committer", {}, nil).and_return(ten_shas)

expect(described_class.count_repo_commits("homebrew/cask", "user1", "author", {})).to eq(5)
expect(described_class.count_repo_commits("homebrew/cask", "user1", "committer", {})).to eq(5)
expect(described_class.count_repo_commits("homebrew/cask", "user1", {})).to eq([5, 5])
end

it "deduplicates commits authored and committed by the same user" do
allow(described_class).to receive(:repo_commits_for_user)
.with("homebrew/core", "user1", "author", {}).and_return(five_shas)
.with("homebrew/core", "user1", "author", {}, nil).and_return(five_shas)
allow(described_class).to receive(:repo_commits_for_user)
.with("homebrew/core", "user1", "committer", {}).and_return(five_shas)
.with("homebrew/core", "user1", "committer", {}, nil).and_return(five_shas)

# Because user1 authored and committed the same 5 commits.
expect(described_class.count_repo_commits("homebrew/core", "user1", "author", {})).to eq(5)
expect(described_class.count_repo_commits("homebrew/core", "user1", "committer", {})).to eq(0)
expect(described_class.count_repo_commits("homebrew/core", "user1", {})).to eq([5, 0])
end
end
end
23 changes: 14 additions & 9 deletions Library/Homebrew/utils/github.rb
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,7 @@ def self.multiple_short_commits_exist?(user, repo, commit)
output[/^Status: (200)/, 1] != "200"
end

def self.repo_commits_for_user(nwo, user, filter, args)
def self.repo_commits_for_user(nwo, user, filter, args, max)
return if Homebrew::EnvConfig.no_github_api?

params = ["#{filter}=#{user}"]
Expand All @@ -768,20 +768,25 @@ def self.repo_commits_for_user(nwo, user, filter, args)
commits = []
API.paginate_rest("#{API_URL}/repos/#{nwo}/commits", additional_query_params: params.join("&")) do |result|
commits.concat(result.map { |c| c["sha"] })
if max.present? && commits.length >= max
opoo "#{user} exceeded #{max} #{nwo} commits as #{filter}, stopped counting!"
break
end
end
commits
end

def self.count_repo_commits(nwo, user, filter, args)
return if Homebrew::EnvConfig.no_github_api?

author_shas = repo_commits_for_user(nwo, user, "author", args)
return author_shas.count if filter == "author"
def self.count_repo_commits(nwo, user, args, max: nil)
odie "Cannot count commits, HOMEBREW_NO_GITHUB_API set!" if Homebrew::EnvConfig.no_github_api?

committer_shas = repo_commits_for_user(nwo, user, "committer", args)
return 0 if committer_shas.empty?
author_shas = repo_commits_for_user(nwo, user, "author", args, max)
committer_shas = repo_commits_for_user(nwo, user, "committer", args, max)
return [0, 0] if author_shas.blank? && committer_shas.blank?

author_count = author_shas.count
# Only count commits where the author and committer are different.
committer_shas.difference(author_shas).count
committer_count = committer_shas.difference(author_shas).count

[author_count, committer_count]
end
end
13 changes: 12 additions & 1 deletion Library/Homebrew/utils/github/api.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def self.pat_blurb(scopes = ALL_SCOPES)
API_URL = "https://api.github.com"
API_MAX_PAGES = 50
API_MAX_ITEMS = 5000
PAGINATE_RETRY_COUNT = 3

CREATE_GIST_SCOPES = ["gist"].freeze
CREATE_ISSUE_FORK_OR_PR_SCOPES = ["repo"].freeze
Expand Down Expand Up @@ -281,7 +282,17 @@ def self.open_rest(

def self.paginate_rest(url, additional_query_params: nil, per_page: 100)
(1..API_MAX_PAGES).each do |page|
result = API.open_rest("#{url}?per_page=#{per_page}&page=#{page}&#{additional_query_params}")
retry_count = 1
result = begin
API.open_rest("#{url}?per_page=#{per_page}&page=#{page}&#{additional_query_params}")
rescue Error
if retry_count < PAGINATE_RETRY_COUNT
retry_count += 1
retry
end

raise
end
break if result.blank?

yield(result, page)
Expand Down
8 changes: 4 additions & 4 deletions completions/fish/brew.fish
Original file line number Diff line number Diff line change
Expand Up @@ -528,15 +528,15 @@ __fish_brew_complete_arg 'config' -l quiet -d 'Make some output more quiet'
__fish_brew_complete_arg 'config' -l verbose -d 'Make some output more verbose'


__fish_brew_complete_cmd 'contributions' 'Contributions to Homebrew repos'
__fish_brew_complete_cmd 'contributions' 'Contributions to Homebrew repositories'
__fish_brew_complete_arg 'contributions' -l csv -d 'Print a CSV of contributions across repositories over the time period'
__fish_brew_complete_arg 'contributions' -l debug -d 'Display any debugging information'
__fish_brew_complete_arg 'contributions' -l from -d 'Date (ISO-8601 format) to start searching contributions'
__fish_brew_complete_arg 'contributions' -l from -d 'Date (ISO-8601 format) to start searching contributions. Omitting this flag searches the last year'
__fish_brew_complete_arg 'contributions' -l help -d 'Show this message'
__fish_brew_complete_arg 'contributions' -l quiet -d 'Make some output more quiet'
__fish_brew_complete_arg 'contributions' -l repositories -d 'Specify a comma-separated (no spaces) list of repositories to search. Supported repositories: `brew`, `core`, `cask`, `aliases`, `autoupdate`, `bundle`, `command-not-found`, `test-bot`, `services`, `cask-fonts` and `cask-versions`. Omitting this flag, or specifying `--repositories=all`, searches all repositories. Use `--repositories=primary` to search only the main repositories: brew,core,cask'
__fish_brew_complete_arg 'contributions' -l repositories -d 'Specify a comma-separated list of repositories to search. Supported repositories: `brew`, `core`, `cask`, `aliases`, `autoupdate`, `bundle`, `command-not-found`, `test-bot`, `services`, `cask-fonts` and `cask-versions`. Omitting this flag, or specifying `--repositories=primary`, searches only the main repositories: brew,core,cask. Specifying `--repositories=all`, searches all repositories. '
__fish_brew_complete_arg 'contributions' -l to -d 'Date (ISO-8601 format) to stop searching contributions'
__fish_brew_complete_arg 'contributions' -l user -d 'A GitHub username or email address of a specific person to find contribution data for'
__fish_brew_complete_arg 'contributions' -l user -d 'Specify a comma-separated list of GitHub usernames or email addresses to find contributions from. Omitting this flag searches maintainers'
__fish_brew_complete_arg 'contributions' -l verbose -d 'Make some output more verbose'


Expand Down

0 comments on commit 59cc141

Please sign in to comment.