From aab523b29e5223d3fbbd7b3f195064140fd54295 Mon Sep 17 00:00:00 2001 From: NewMountain Date: Fri, 5 Nov 2021 20:40:42 -0700 Subject: [PATCH 1/5] Update tests (note: contribution and churn remained the same) Updated internal calculations to track both lines added and removed while matching the original contribution and churn calculations Additionally, file change counts were manually reviewed to ensure their correctness. --- test_gitcodechurn.py | 389 +++++++++++++++++++++---------------------- 1 file changed, 188 insertions(+), 201 deletions(-) diff --git a/test_gitcodechurn.py b/test_gitcodechurn.py index 881c752..20af215 100644 --- a/test_gitcodechurn.py +++ b/test_gitcodechurn.py @@ -216,7 +216,7 @@ "+if __name__ == '__main__':", "+ main()", ], - "files": {"gitcodechurn.py": {0: 0, 1: 190}}, + "files": {"gitcodechurn.py": {0: {"lines_added": 0, "lines_removed": 0}, 1: {"lines_added": 190, "lines_removed": 0}}}, "contribution": 190, "churn": 0, }, @@ -251,7 +251,7 @@ "+Version: 0.1", "+", ], - "files": {"gitcodechurn.py": {1: 0, 2: 4}}, + "files": {"gitcodechurn.py": {1: {"lines_added": 0, "lines_removed": 0}, 2: {"lines_added": 4, "lines_removed": 0}}}, "contribution": 4, "churn": 0, }, @@ -269,7 +269,7 @@ "-*Reference: https://blog.gitprime.com/why-code-churn-matters/*", "+*Reference: https://www.pluralsight.com/blog/teams/why-code-churn-matters*", ], - "files": {"README.md": {8: 0}}, + "files": {"README.md": {8: {"lines_removed": 0, "lines_added": 0}}}, "contribution": 0, "churn": 0, }, @@ -287,7 +287,7 @@ "-Solutions that I've found online looked at changes to files irrespective whether these are new changes or edits to existing files. Hence this solution that segments code edits (churn) with new code changes (contribution).", "+Solutions that I've found online looked at changes to files irrespective whether these are new changes or edits to existing lines of code within existing files. Hence this solution that segments line-of-code edits (churn) with new code changes (contribution).", ], - "files": {"README.md": {10: 0}}, + "files": {"README.md": {10: {"lines_added": 0, "lines_removed": 0}}}, "contribution": 0, "churn": 0, }, @@ -305,7 +305,7 @@ '-A Python script to compute "true" code churn of a Git repository. Especially useful for software teams.', '+A Python script to compute "true" code churn of a Git repository. Useful for software teams to openly help manage technical debt.', ], - "files": {"README.md": {2: 0}}, + "files": {"README.md": {2: {"lines_added": 0, "lines_removed": 0}}}, "contribution": 0, "churn": 0, }, @@ -399,38 +399,34 @@ ], "files": { "gitcodechurn.py": { - 37: 2, - 40: 1, - 42: 1, - 45: 1, - 47: 1, - 50: 1, - 52: 1, - 55: 1, - # It appears the churn is on line 57 - # as it is an add and then delete - # Ideally, this data structure would - # show line 57 was both deleted and extended - 57: 9, - 62: 1, - 64: 1, - 66: 2, - 72: 1, - 74: 10, - 80: 1, - 83: 1, - 85: 0, - 87: 1, - 89: 0, - 99: 2, - 102: 0, - 105: 2, - 108: 1, - 111: 3, - 167: 1, - 191: 1, - 192: 0, - 217: 14, + 37: {'lines_added': 2, 'lines_removed': 0}, + 40: {'lines_added': 0, 'lines_removed': 1}, + 42: {'lines_added': 1, 'lines_removed': 0}, + 45: {'lines_added': 0, 'lines_removed': 1}, + 47: {'lines_added': 1, 'lines_removed': 0}, + 50: {'lines_added': 0, 'lines_removed': 1}, + 52: {'lines_added': 1, 'lines_removed': 0}, + 55: {'lines_added': 0, 'lines_removed': 1}, + 57: {'lines_added': 8, 'lines_removed': 1}, + 62: {'lines_added': 0, 'lines_removed': 1}, + 64: {'lines_added': 0, 'lines_removed': 1}, + 66: {'lines_added': 2, 'lines_removed': 0}, + 72: {'lines_added': 1, 'lines_removed': 0}, + 74: {'lines_added': 10, 'lines_removed': 0}, + 80: {'lines_added': 0, 'lines_removed': 1}, + 83: {'lines_added': 0, 'lines_removed': 1}, + 85: {'lines_added': 0, 'lines_removed': 0}, + 87: {'lines_added': 0, 'lines_removed': 1}, + 89: {'lines_added': 0, 'lines_removed': 0}, + 99: {'lines_added': 2, 'lines_removed': 0}, + 102: {'lines_added': 0, 'lines_removed': 0}, + 105: {'lines_added': 2, 'lines_removed': 0}, + 108: {'lines_added': 1, 'lines_removed': 0}, + 111: {'lines_added': 3, 'lines_removed': 0}, + 167: {'lines_added': 0, 'lines_removed': 1}, + 191: {'lines_added': 1, 'lines_removed': 0}, + 192: {'lines_added': 0, 'lines_removed': 0}, + 217: {'lines_added': 14, 'lines_removed': 0}, } }, "churn": 1, @@ -450,7 +446,7 @@ "- usage = 'python [*/]gitcodechurn.py before=YYY-MM-DD after=YYYY-MM-DD dir=[*/]path [-exdir=[*/]path] [-h]',", '+ usage = \'python [*/]gitcodechurn.py before="YYYY-MM-DD" after="YYYY-MM-DD" author="flacle" dir="[*/]path" [-exdir="[*/]path]" [-h]\',', ], - "files": {"gitcodechurn.py": {38: 0}}, + "files": {"gitcodechurn.py": {38: {'lines_added': 0, 'lines_removed': 0}}}, "churn": 0, "contribution": 0, }, @@ -468,7 +464,7 @@ '- usage = \'python [*/]gitcodechurn.py before="YYYY-MM-DD" after="YYYY-MM-DD" author="flacle" dir="[*/]path" [-exdir="[*/]path]" [-h]\',', '+ usage = \'python [*/]gitcodechurn.py before="YYYY-MM-DD" after="YYYY-MM-DD" author="flacle" dir="[*/]path" [-exdir="[*/]path"] [-h]\',', ], - "files": {"gitcodechurn.py": {38: 0}}, + "files": {"gitcodechurn.py": {38: {'lines_added': 0, 'lines_removed': 0}}}, "contribution": 0, "churn": 0, }, @@ -551,33 +547,33 @@ ], "files": { "README.md": { - 11: 0, - 12: 2, - 16: 1, - 18: 1, - 20: 1, - 21: 11, - 22: 1, - 24: 2, - 33: 1, - 35: 1, - 37: 3, + 11: {'lines_added': 0, 'lines_removed': 0}, + 12: {'lines_added': 2, 'lines_removed': 0}, + 16: {'lines_added': 0, 'lines_removed': 1}, + 18: {'lines_added': 1, 'lines_removed': 0}, + 20: {'lines_added': 0, 'lines_removed': 1}, + 21: {'lines_added': 11, 'lines_removed': 0}, + 22: {'lines_added': 0, 'lines_removed': 1}, + 24: {'lines_added': 0, 'lines_removed': 2}, + 33: {'lines_added': 1, 'lines_removed': 0}, + 35: {'lines_added': 1, 'lines_removed': 0}, + 37: {'lines_added': 3, 'lines_removed': 0}, }, "gitcodechurn.py": { - 38: 0, - 42: 0, - 44: 0, - 47: 0, - 49: 0, - 70: 0, - 71: 1, - 72: 1, - 79: 0, - 80: 1, - 81: 1, - 103: 1, - 223: 1, - 224: 1, + 38: {'lines_added': 0, 'lines_removed': 0}, + 42: {'lines_added': 0, 'lines_removed': 0}, + 44: {'lines_added': 0, 'lines_removed': 0}, + 47: {'lines_added': 0, 'lines_removed': 0}, + 49: {'lines_added': 0, 'lines_removed': 0}, + 70: {'lines_added': 0, 'lines_removed': 0}, + 71: {'lines_added': 0, 'lines_removed': 1}, + 72: {'lines_added': 1, 'lines_removed': 0}, + 79: {'lines_added': 0, 'lines_removed': 0}, + 80: {'lines_added': 0, 'lines_removed': 1}, + 81: {'lines_added': 1, 'lines_removed': 0}, + 103: {'lines_added': 1, 'lines_removed': 0}, + 223: {'lines_added': 0, 'lines_removed': 1}, + 224: {'lines_added': 1, 'lines_removed': 0}, }, }, "contribution": 31, @@ -642,16 +638,16 @@ "+", ], "files": { - "README.md": {15: 0}, + "README.md": {15: {'lines_added': 0, 'lines_removed': 0}}, "gitcodechurn.py": { - 4: 0, - 16: 0, - 172: 0, - 173: 2, - 192: 2, - 193: 0, - 197: 0, - 198: 25, + 4: {'lines_added': 0, 'lines_removed': 0}, + 16: {'lines_added': 0, 'lines_removed': 0}, + 172: {'lines_added': 0, 'lines_removed': 0}, + 173: {'lines_added': 0, 'lines_removed': 2}, + 192: {'lines_added': 2, 'lines_removed': 0}, + 193: {'lines_added': 0, 'lines_removed': 0}, + 197: {'lines_added': 0, 'lines_removed': 0}, + 198: {'lines_added': 25, 'lines_removed': 0}, }, }, "contribution": 29, @@ -757,23 +753,29 @@ "+ command = 'git log --author=\"'+author+'\" --format=\"'+format+'\" --no-abbrev '", ], "files": { - "README.md": {15: 0, 24: 0, 31: 0, 35: 0, 41: 12}, + "README.md": { + 15: {'lines_added': 0, 'lines_removed': 0}, + 24: {'lines_added': 0, 'lines_removed': 0}, + 31: {'lines_added': 0, 'lines_removed': 0}, + 35: {'lines_added': 0, 'lines_removed': 0}, + 41: {'lines_added': 12, 'lines_removed': 0} + }, "gitcodechurn.py": { - 11: -1, - 15: 6, - 16: 5, - 44: 0, - 49: 0, - 54: 0, - 60: 0, - 67: 0, - 103: 6, - 142: 2, - 148: 3, - 189: 1, - 192: 3, - 196: 2, - 200: 1, + 11: {'lines_added': 0, 'lines_removed': 1}, + 15: {'lines_added': 6, 'lines_removed': 0}, + 16: {'lines_added': 0, 'lines_removed': 5}, + 44: {'lines_added': 0, 'lines_removed': 0}, + 49: {'lines_added': 0, 'lines_removed': 0}, + 54: {'lines_added': 0, 'lines_removed': 0}, + 60: {'lines_added': 0, 'lines_removed': 0}, + 67: {'lines_added': 0, 'lines_removed': 0}, + 103: {'lines_added': 6, 'lines_removed': 0}, + 142: {'lines_added': 0, 'lines_removed': 2}, + 148: {'lines_added': 3, 'lines_removed': 0}, + 189: {'lines_added': 0, 'lines_removed': 1}, + 192: {'lines_added': 0, 'lines_removed': 3}, + 196: {'lines_added': 2, 'lines_removed': 0}, + 200: {'lines_added': 1, 'lines_removed': 0}, }, }, "contribution": 42, @@ -887,45 +889,30 @@ def test_is_loc_change(): def test_get_loc_change(): """Given a result from `is_loc_change`, extract the count of lines changed. - The logic here is the left side is lines removed - the right side is lines added. - - If no number is given after the comma, it is assumed to be 1 + The function will return a tuple of two line-change tuples + The schema of each line-change tuple is (line_number, lines_modified) + So "-11,0 +12,2" would become ((11, 0, 0), (12, 0, 2)) """ tests = [ - ("-11,0 +12,2", {11: 0, 12: 2}), - ("-16 +18", {16: 1, 18: 1}), - ("-18,0 +21,11", {18: 0, 21: 11}), - ("-22 +35", {22: 1, 35: 1}), - ("-24,2 +37,3", {24: 2, 37: 3}), - ( - "-38 +38", - { - 38: 0, - }, - ), - ( - "-42 +42", - { - 42: 0, - }, - ), - ("-71 +70,0", {71: 1, 70: 0}), - ("-72,0 +72", {72: 1}), - # According to the spec, - # it returns a dictionary where left are removals and right are additions - # There's a bug here as dictionaries are non-ordered structures - # Note how the "left" negative appears to the right in the dictionary result - ("-80 +79,0", {79: 0, 80: 1}), - ("-81,0 +81", {81: 1}), - ("-103,2 +103,3", {103: 1}), - ("-223 +224", {223: 1, 224: 1}), - ("-103 +103,7", {103: 6}), - ("-142,2 +148,3", {142: 2, 148: 3}), - ("-189 +196,2", {189: 1, 196: 2}), - ("-192,3 +200", {192: 3, 200: 1}), - ("-0,0 +1,190", {0: 0, 1: 190}), - ("-1,0 +2,4", {1: 0, 2: 4}), + ("-11,0 +12,2", ((11,0), (12, 2))), + ("-16 +18", ((16,1), (18, 1))), + ("-18,0 +21,11", ((18,0),(21,11))), + ("-22 +35", ((22,1), (35,1))), + ("-24,2 +37,3", ((24,2),(37,3))), + ("-38 +38", ((38,1), (38,1))), + ("-42 +42", ((42,1),(42,1))), + ("-71 +70,0", ((71,1),(70,0))), + ("-72,0 +72", ((72,0),(72,1))), + ("-80 +79,0", ((80,1),(79,0))), + ("-81,0 +81", ((81,0),(81,1))), + ("-103,2 +103,3", ((103,2),(103,3))), + ("-223 +224", ((223,1),(224,1))), + ("-103 +103,7", ((103,1),(103,7))), + ("-142,2 +148,3", ((142,2),(148,3))), + ("-189 +196,2", ((189,1),(196,2))), + ("-192,3 +200", ((192,3),(200,1))), + ("-0,0 +1,190", ((0,0),(1,190))), + ("-1,0 +2,4", ((1,0), (2,4))), ] for (line, expected) in tests: assert get_loc_change(line) == expected @@ -973,82 +960,82 @@ def test_calculate_statistics(mocker): assert actual_files == { "README.md": { - 2: 0, - 8: 0, - 10: 0, - 11: 0, - 12: 2, - 15: 0, - 16: 1, - 18: 1, - 20: 1, - 21: 11, - 22: 1, - 24: 2, - 31: 0, - 33: 1, - 35: 1, - 37: 3, - 41: 12, + 2: {'lines_added': 0, 'lines_removed': 0}, + 8: {'lines_added': 0, 'lines_removed': 0}, + 10: {'lines_added': 0, 'lines_removed': 0}, + 11: {'lines_added': 0, 'lines_removed': 0}, + 12: {'lines_added': 2, 'lines_removed': 0}, + 15: {'lines_added': 0, 'lines_removed': 0}, + 16: {'lines_added': 0, 'lines_removed': 1}, + 18: {'lines_added': 1, 'lines_removed': 0}, + 20: {'lines_added': 0, 'lines_removed': 1}, + 21: {'lines_added': 11, 'lines_removed': 0}, + 22: {'lines_added': 0, 'lines_removed': 1}, + 24: {'lines_added': 0, 'lines_removed': 2}, + 31: {'lines_added': 0, 'lines_removed': 0}, + 33: {'lines_added': 1, 'lines_removed': 0}, + 35: {'lines_added': 1, 'lines_removed': 0}, + 37: {'lines_added': 3, 'lines_removed': 0}, + 41: {'lines_added': 12, 'lines_removed': 0}, }, "gitcodechurn.py": { - 0: 0, - 1: 190, - 2: 4, - 4: 0, - 11: -1, - 15: 6, - 16: 5, - 37: 2, - 38: 0, - 40: 1, - 42: 1, - 44: 0, - 45: 1, - 47: 1, - 49: 0, - 50: 1, - 52: 1, - 54: 0, - 55: 1, - 57: 9, - 60: 0, - 62: 1, - 64: 1, - 66: 2, - 67: 0, - 70: 0, - 71: 1, - 72: 2, - 74: 10, - 79: 0, - 80: 2, - 81: 1, - 83: 1, - 85: 0, - 87: 1, - 89: 0, - 99: 2, - 102: 0, - 103: 7, - 105: 2, - 108: 1, - 111: 3, - 142: 2, - 148: 3, - 167: 1, - 172: 0, - 173: 2, - 189: 1, - 191: 1, - 192: 5, - 193: 0, - 196: 2, - 197: 0, - 198: 25, - 200: 1, - 217: 14, - 223: 1, - 224: 1, + 0: {'lines_added': 0, 'lines_removed': 0}, + 1: {'lines_added': 190, 'lines_removed': 0}, + 2: {'lines_added': 4, 'lines_removed': 0}, + 4: {'lines_added': 0, 'lines_removed': 0}, + 11: {'lines_added': 0, 'lines_removed': 1}, + 15: {'lines_added': 6, 'lines_removed': 0}, + 16: {'lines_added': 0, 'lines_removed': 5}, + 37: {'lines_added': 2, 'lines_removed': 0}, + 38: {'lines_added': 0, 'lines_removed': 0}, + 40: {'lines_added': 0, 'lines_removed': 1}, + 42: {'lines_added': 1, 'lines_removed': 0}, + 44: {'lines_added': 0, 'lines_removed': 0}, + 45: {'lines_added': 0, 'lines_removed': 1}, + 47: {'lines_added': 1, 'lines_removed': 0}, + 49: {'lines_added': 0, 'lines_removed': 0}, + 50: {'lines_added': 0, 'lines_removed': 1}, + 52: {'lines_added': 1, 'lines_removed': 0}, + 54: {'lines_added': 0, 'lines_removed': 0}, + 55: {'lines_added': 0, 'lines_removed': 1}, + 57: {'lines_added': 8, 'lines_removed': 1}, + 60: {'lines_added': 0, 'lines_removed': 0}, + 62: {'lines_added': 0, 'lines_removed': 1}, + 64: {'lines_added': 0, 'lines_removed': 1}, + 66: {'lines_added': 2, 'lines_removed': 0}, + 67: {'lines_added': 0, 'lines_removed': 0}, + 70: {'lines_added': 0, 'lines_removed': 0}, + 71: {'lines_added': 0, 'lines_removed': 1}, + 72: {'lines_added': 2, 'lines_removed': 0}, + 74: {'lines_added': 10, 'lines_removed': 0}, + 79: {'lines_added': 0, 'lines_removed': 0}, + 80: {'lines_added': 0, 'lines_removed': 2}, + 81: {'lines_added': 1, 'lines_removed': 0}, + 83: {'lines_added': 0, 'lines_removed': 1}, + 85: {'lines_added': 0, 'lines_removed': 0}, + 87: {'lines_added': 0, 'lines_removed': 1}, + 89: {'lines_added': 0, 'lines_removed': 0}, + 99: {'lines_added': 2, 'lines_removed': 0}, + 102: {'lines_added': 0, 'lines_removed': 0}, + 103: {'lines_added': 7, 'lines_removed': 0}, + 105: {'lines_added': 2, 'lines_removed': 0}, + 108: {'lines_added': 1, 'lines_removed': 0}, + 111: {'lines_added': 3, 'lines_removed': 0}, + 142: {'lines_added': 0, 'lines_removed': 2}, + 148: {'lines_added': 3, 'lines_removed': 0}, + 167: {'lines_added': 0, 'lines_removed': 1}, + 172: {'lines_added': 0, 'lines_removed': 0}, + 173: {'lines_added': 0, 'lines_removed': 2}, + 189: {'lines_added': 0, 'lines_removed': 1}, + 191: {'lines_added': 1, 'lines_removed': 0}, + 192: {'lines_added': 2, 'lines_removed': 3}, + 193: {'lines_added': 0, 'lines_removed': 0}, + 196: {'lines_added': 2, 'lines_removed': 0}, + 197: {'lines_added': 0, 'lines_removed': 0}, + 198: {'lines_added': 25, 'lines_removed': 0}, + 200: {'lines_added': 1, 'lines_removed': 0}, + 217: {'lines_added': 14, 'lines_removed': 0}, + 223: {'lines_added': 0, 'lines_removed': 1}, + 224: {'lines_added': 1, 'lines_removed': 0}, }, } From ed53ed9f0bd00d5f4ca9fa1356519207bda2fc54 Mon Sep 17 00:00:00 2001 From: NewMountain Date: Fri, 5 Nov 2021 20:41:48 -0700 Subject: [PATCH 2/5] Added command line argument to optionally show file data --- gitcodechurn.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gitcodechurn.py b/gitcodechurn.py index f69062c..7e09b02 100644 --- a/gitcodechurn.py +++ b/gitcodechurn.py @@ -67,6 +67,11 @@ def main(): default = '', help = 'the Git repository subdirectory to be excluded' ) + parser.add_argument( + "--show-file-data", + action="store_true", + help="Display line change information for the analyzed file(s)" + ) args = parser.parse_args() after = args.after From 6f4f6acb6ef3f7a51b6aacdd2cb22a229ca9e6e8 Mon Sep 17 00:00:00 2001 From: NewMountain Date: Fri, 5 Nov 2021 20:45:26 -0700 Subject: [PATCH 3/5] Updated internal logic to support line remove and line add tracking * Refactored existing code to track line add and line removal * Made sure the code remained true to original churn and contributions counts --- gitcodechurn.py | 101 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 92 insertions(+), 9 deletions(-) diff --git a/gitcodechurn.py b/gitcodechurn.py index 7e09b02..38512b4 100644 --- a/gitcodechurn.py +++ b/gitcodechurn.py @@ -147,9 +147,9 @@ def get_loc(commit, dir, files, contribution, churn, exdir): new_loc_changes = is_loc_change(result, loc_changes) if loc_changes != new_loc_changes: loc_changes = new_loc_changes - locc = get_loc_change(loc_changes) - for loc in locc: - if loc in files[file]: + (removal, addition) = get_loc_change(loc_changes) + + files, contribution, churn = merge_operations(removal, addition, files, contribution, churn, file) files[file][loc] += locc[loc] churn += abs(locc[loc]) else: @@ -160,8 +160,89 @@ def get_loc(commit, dir, files, contribution, churn, exdir): return [files, contribution, churn] -def get_commit_results(command, dir): - return get_proc_out(command, dir).splitlines() +def merge_operations(removal, addition, files, contribution, churn, file): + # Ensure all required data is in place + ensure_file_exists(files, file) + + file_line_churn_dict = files[file] + + if is_noop(removal, addition): + # In the case of a noop, it's not counted in change metrics, but should + # be marked as changed to accurately include future churn metrics + # An example of this is a diff like: + # "diff --git README.md README.md", + # "index bedbc85..bb033cd 100644", + # "--- README.md", + # "+++ README.md", + # "@@ -8 +8 @@ Code churn has several definitions, the one that to me provides the most value a", + # "-*Reference: https://blog.gitprime.com/why-code-churn-matters/*", + # "+*Reference: https://www.pluralsight.com/blog/teams/why-code-churn-matters*", + # In this example, we deleted the line, and then added the line by updating the link + # This repo would consider this a "No-Op" as it nets to no change + # However, we want to mark line 8 as changed so that all subsequent + # changes to line 8 are marked as churn + # The thinking behind this is the other updates should have been made + # while this change was being made. + remove_line_number = removal[0] + ensure_line_exists(file_line_churn_dict, remove_line_number) + return files, contribution, churn + + for (line_number, lines_removed, lines_added) in compute_changes(removal, addition): + # Churn check performed before line modification changes + is_churn = is_this_churn(file_line_churn_dict, line_number) + + ensure_line_exists(file_line_churn_dict, line_number) + line_count_change_metrics = file_line_churn_dict[line_number] + + line_count_change_metrics["lines_removed"] += lines_removed + line_count_change_metrics["lines_added"] += lines_added + + if is_churn: + churn += abs(lines_removed) + abs(lines_added) + else: + contribution += abs(lines_removed) + abs(lines_added) + + return files, contribution, churn + + +def compute_changes(removal, addition): + # If both removal and addition affect the same line, net out the change + # Returns a list of tuples of type (line_number, lines_removed, lines_added) + removed_line_number, lines_removed = removal + added_line_number, lines_added = addition + + if removed_line_number == added_line_number: + if lines_added >= lines_removed: + return [(removed_line_number, 0, (lines_added - lines_removed))] + else: + return [(removed_line_number, (lines_removed - lines_added), 0)] + else: + return [ + (removed_line_number, lines_removed, 0), + (added_line_number, 0, lines_added), + ] + + +def is_this_churn(file_line_churn_dict, line_number): + # The definition of churn is any change to a line + # after the first time the line has been changed + # This is detected by a line operation logged in the file_line_churn_dict + return line_number in file_line_churn_dict + + +def ensure_line_exists(file_line_churn_dict, line_number): + if line_number not in file_line_churn_dict: + file_line_churn_dict[line_number] = {"lines_removed": 0, "lines_added": 0} + + +def ensure_file_exists(files, file): + if file not in files: + files[file] = {} + + +def is_noop(removal, addition): + # A noop event occurs when a change indicates one delete and one add on the same line + return removal == addition # arrives in a format such as -13 +27,5 (no commas mean 1 loc change) @@ -180,6 +261,8 @@ def get_loc_change(loc_changes): left = int(left[1:]) left_dec = 1 + removal = (left, left_dec) + # additions right = loc_changes[loc_changes.find(' ')+1:] right_dec = 0 @@ -191,10 +274,10 @@ def get_loc_change(loc_changes): right = int(right[1:]) right_dec = 1 - if left == right: - return {left: (right_dec - left_dec)} - else: - return {left : left_dec, right: right_dec} + addition = (right, right_dec) + + return (removal, addition) + def is_loc_change(result, loc_changes): # search for loc changes (@@ ) and update loc_changes variable From f720547a8c0f7fc454f09a1b78640d967a524cb7 Mon Sep 17 00:00:00 2001 From: NewMountain Date: Fri, 5 Nov 2021 20:45:53 -0700 Subject: [PATCH 4/5] Added pretty-print functionality for page statistics --- gitcodechurn.py | 52 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/gitcodechurn.py b/gitcodechurn.py index 38512b4..af6a3f6 100644 --- a/gitcodechurn.py +++ b/gitcodechurn.py @@ -104,6 +104,53 @@ def main(): # print files in case more granular results are needed #print('files: ', files) + if args.show_file_data: + display_file_metrics(files) + + +def display_file_metrics(files): + display_file_metrics_header() + for file_name, line_change_info in files.items(): + for line_number, line_diff_stats in line_change_info.items(): + display_file_metrics_row(file_name, line_number, line_diff_stats) + + +def display_file_metrics_header(): + print("-" * 79) + print( + "{file}|{line_number}|{lines_added}|{lines_removed}".format( + file=format_column("FILE NAME", 34), + line_number=format_column("LINE #", 10), + lines_added=format_column("ADDED", 10), + lines_removed=format_column("REMOVED", 10), + ) + ) + + +def display_file_metrics_row(file_name, line_number, line_diff_stats): + added = line_diff_stats.get("lines_added") + removed = line_diff_stats.get("lines_removed") + + if added == 0 and removed == 0: + return + print("-" * 79) + print( + "{file}|{ln}|{lines_added}|{lines_removed}".format( + file=format_column(file_name, 34), + ln=format_column(str(line_number), 10), + lines_added=format_column(str(added), 10), + lines_removed=format_column(str(removed), 10), + ) + ) + + +def format_column(text, width): + text_length = len(text) + total_pad = width - text_length + pad_left = total_pad // 2 + pad_right = total_pad - pad_left + return (" " * pad_left) + text + (" " * pad_right) + def calculate_statistics(commits, dir, exdir): # structured like this: files -> LOC @@ -150,11 +197,6 @@ def get_loc(commit, dir, files, contribution, churn, exdir): (removal, addition) = get_loc_change(loc_changes) files, contribution, churn = merge_operations(removal, addition, files, contribution, churn, file) - files[file][loc] += locc[loc] - churn += abs(locc[loc]) - else: - files[file][loc] = locc[loc] - contribution += abs(locc[loc]) else: continue return [files, contribution, churn] From 311cd570d8c58402d5c8a22bdedfbf6f721206c0 Mon Sep 17 00:00:00 2001 From: NewMountain Date: Fri, 5 Nov 2021 20:46:07 -0700 Subject: [PATCH 5/5] Provide example of updated API in README --- README.md | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/README.md b/README.md index 1c8256a..fee6526 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ Result is a print with aggregated contribution and churn per author for a given - **-h, --h, --help**    show this help message and exit - **-exdir**                   exclude Git repository subdirectory +-- **--show-file-data** display line count changes per file ## Usage Example 1 @@ -63,6 +64,73 @@ contribution: 4423 churn: -543 ``` +## Usage Example 3 + +```bash +python ./gitcodechurn.py after="2018-11-29" before="2021-11-05" author="flacle" dir="/Users/myname/myrepo" --show-file-data +``` + +## Output 3 + +```bash +author: flacle +contribution: 337 +churn: -19 +------------------------------------------------------------------------------- + FILE NAME | LINE # | ADDED | REMOVED +------------------------------------------------------------------------------- + gitcodechurn.py | 1 | 190 | 0 +------------------------------------------------------------------------------- + gitcodechurn.py | 2 | 4 | 0 +------------------------------------------------------------------------------- + gitcodechurn.py | 37 | 2 | 0 +------------------------------------------------------------------------------- + gitcodechurn.py | 40 | 0 | 1 +------------------------------------------------------------------------------- + gitcodechurn.py | 42 | 1 | 0 +------------------------------------------------------------------------------- + gitcodechurn.py | 45 | 0 | 1 +------------------------------------------------------------------------------- + gitcodechurn.py | 47 | 1 | 0 +------------------------------------------------------------------------------- + gitcodechurn.py | 50 | 0 | 1 +------------------------------------------------------------------------------- + gitcodechurn.py | 52 | 1 | 0 +------------------------------------------------------------------------------- + gitcodechurn.py | 55 | 0 | 1 +------------------------------------------------------------------------------- + gitcodechurn.py | 57 | 8 | 1 +------------------------------------------------------------------------------- + gitcodechurn.py | 66 | 2 | 0 +------------------------------------------------------------------------------- + gitcodechurn.py | 62 | 0 | 1 +... +------------------------------------------------------------------------------- + gitcodechurn.py | 200 | 1 | 0 +------------------------------------------------------------------------------- + README.md | 12 | 2 | 0 +------------------------------------------------------------------------------- + README.md | 16 | 0 | 1 +------------------------------------------------------------------------------- + README.md | 18 | 1 | 0 +------------------------------------------------------------------------------- + README.md | 21 | 11 | 0 +------------------------------------------------------------------------------- + README.md | 20 | 0 | 1 +------------------------------------------------------------------------------- + README.md | 33 | 1 | 0 +------------------------------------------------------------------------------- + README.md | 22 | 0 | 1 +------------------------------------------------------------------------------- + README.md | 35 | 1 | 0 +------------------------------------------------------------------------------- + README.md | 24 | 0 | 2 +------------------------------------------------------------------------------- + README.md | 37 | 3 | 0 +------------------------------------------------------------------------------- + README.md | 41 | 12 | 0 +``` + Outputs of Usage Example 1 can be used as part of a pipeline that generates bar charts for reports: ![contribution vs churn example chart](/chart.png)