From 909f7c67eb53b52162970fcc8fa9f9aeddfa0570 Mon Sep 17 00:00:00 2001 From: Ocre42 Date: Tue, 19 Mar 2019 11:51:58 +0000 Subject: [PATCH] Renaming variables and some linting --- statsrunner/gitaggregate-publisher.py | 43 ++++++++++----------- statsrunner/gitaggregate.py | 55 +++++++++++++-------------- 2 files changed, 48 insertions(+), 50 deletions(-) diff --git a/statsrunner/gitaggregate-publisher.py b/statsrunner/gitaggregate-publisher.py index 5de00dc750a..e1ab2bce716 100644 --- a/statsrunner/gitaggregate-publisher.py +++ b/statsrunner/gitaggregate-publisher.py @@ -9,10 +9,10 @@ GITOUT_DIR = os.environ.get('GITOUT_DIR') or 'gitout' -# Only aggregate certain json stats files at publisher level -# These should be small stats files that will not consume large amounts of +# Only aggregate certain json stats files at publisher level +# These should be small stats files that will not consume large amounts of # memory/disk space if aggregated over time -whitelisted_stats_files = [ +whitelisted_stats_files = [ 'activities', 'activity_files', 'bottom_hierarchy', @@ -29,12 +29,12 @@ 'latest_transaction_date', 'transaction_dates_hash', 'most_recent_transaction_date' - ] +] # Set bool if the 'dated' argument has been used in calling this script dated = len(sys.argv) > 1 and sys.argv[1] == 'dated' -# Load the reference of commits to dates +# Load the reference of commits to dates if dated: gitdates = json.load(open('gitdate.json')) @@ -42,43 +42,42 @@ # Variable commit will be the commit hash for commit in os.listdir(os.path.join(GITOUT_DIR, 'commits')): print "gitaggregate-publisher for commit {}".format(commit) - + for publisher in os.listdir(os.path.join(GITOUT_DIR, 'commits', commit, 'aggregated-publisher')): print "{0} Currently looping over publisher {1}".format(str(datetime.datetime.now()), publisher) - + # Set output directory for this publisher and attempt to make the directory. Pass if it already exists - git_out_dir = os.path.join(GITOUT_DIR,'gitaggregate-publisher-dated' if dated else 'gitaggregate-publisher', publisher) + git_out_dir = os.path.join(GITOUT_DIR, 'gitaggregate-publisher-dated' if dated else 'gitaggregate-publisher', publisher) try: os.makedirs(git_out_dir) except OSError: pass - + # Set an output dictionary for this publisher total = defaultdict(dict) - + if os.path.isdir(git_out_dir): # Loop over the existing files in the output directory for this publisher and load them into the 'total' dictionary for fname in os.listdir(git_out_dir): if fname.endswith('.json'): - with open(os.path.join(git_out_dir, fname)) as fp: - total[fname[:-5]] = json.load(fp, parse_float=decimal.Decimal) + with open(os.path.join(git_out_dir, fname)) as filepath: + total[fname[:-5]] = json.load(filepath, parse_float=decimal.Decimal) # Loop over the whitelisted states files and add current values to the 'total' dictionary for statname in whitelisted_stats_files: path = os.path.join(GITOUT_DIR, 'commits', commit, 'aggregated-publisher', publisher, statname+'.json') if os.path.isfile(path): - with open(path) as fp: - k = statname - if not commit in total[k]: - v = json.load(fp, parse_float=decimal.Decimal) + with open(path) as filepath: + if commit not in total[statname]: + statfile = json.load(filepath, parse_float=decimal.Decimal) if dated: if commit in gitdates: - total[k][gitdates[commit]] = v + total[statname][gitdates[commit]] = statfile else: - total[k][commit] = v + total[statname][commit] = statfile # Write data from the 'total' dictionary to a temporary file, then rename - for k,v in total.items(): - with open(os.path.join(git_out_dir, k+'.json.new'), 'w') as fp: - json.dump(v, fp, sort_keys=True, indent=2, default=decimal_default) - os.rename(os.path.join(git_out_dir, k+'.json.new'), os.path.join(git_out_dir, k+'.json')) + for statname, statfile in total.items(): + with open(os.path.join(git_out_dir, statname + '.json.new'), 'w') as filepath: + json.dump(statfile, filepath, sort_keys=True, indent=2, default=decimal_default) + os.rename(os.path.join(git_out_dir, statname + '.json.new'), os.path.join(git_out_dir, statname+'.json')) diff --git a/statsrunner/gitaggregate.py b/statsrunner/gitaggregate.py index 97fabaaf465..f9f73508ee7 100644 --- a/statsrunner/gitaggregate.py +++ b/statsrunner/gitaggregate.py @@ -1,9 +1,8 @@ -from collections import defaultdict -from common import decimal_default import decimal import json -import os +import os import sys +from common import decimal_default # Set value for the gitout directory GITOUT_DIR = os.environ.get('GITOUT_DIR') or 'gitout' @@ -14,8 +13,8 @@ git_out_dir = os.path.join(GITOUT_DIR, 'gitaggregate-dated' if dated else 'gitaggregate') # Exclude some json stats files from being aggregated -# These are typically the largest stats files that would consume large amounts of -# memory/disk space if aggregated over time +# These are typically the largest stats files that would consume large amounts +# of memory/disk space if aggregated over time whitelisted_stats_files = [ 'activities', 'activity_files', @@ -31,10 +30,10 @@ 'unique_identifiers', 'validation', 'versions', - 'teststat' # Extra 'stat' added as the test_gitaggregate.py assumes a file with this name is present - ] + 'teststat' # Extra 'stat' added as the test_gitaggregate.py assumes a file with this name is present +] -# Load the reference of commits to dates +# Load the reference of commits to dates if dated: gitdates = json.load(open('gitdate.json')) @@ -54,38 +53,38 @@ for fname in os.listdir(os.path.join(GITOUT_DIR, 'commits', commit, 'aggregated')): if not fname.endswith('.json'): continue - - k = fname[:-5] # remove '.json' from the filename + + trimmed_name = fname[:-5] # remove '.json' from the filename # Ignore certain files - if k not in whitelisted_stats_files: - continue + if trimmed_name not in whitelisted_stats_files: + continue print 'Adding to {} for file: {}'.format('gitaggregate-dated' if dated else 'gitaggregate', fname) - + commit_json_fname = os.path.join(GITOUT_DIR, 'commits', commit, 'aggregated', fname) - + # Load the current file conents to memory, or set as an empty dictionary if fname in git_out_files: # FIXME: This is a possible cause of a memory issue in future, as the size of the aggregate file # increases each time there is a new commit - with open(os.path.join(git_out_dir, fname)) as fp: - v = json.load(fp, parse_float=decimal.Decimal) + with open(os.path.join(git_out_dir, fname)) as filepath: + gitaggregate_json = json.load(filepath, parse_float=decimal.Decimal) else: - v = {} - + gitaggregate_json = {} + # If the commit that we are looping over is not already in the data for this file, then add it to the output - if not commit in v: - with open(commit_json_fname) as fp2: - v2 = json.load(fp2, parse_float=decimal.Decimal) + if commit not in gitaggregate_json: + with open(commit_json_fname) as commit_filepath: + commit_gitaggregate_json = json.load(commit_filepath, parse_float=decimal.Decimal) if dated: if commit in gitdates: - v[gitdates[commit]] = v2 + gitaggregate_json[gitdates[commit]] = commit_gitaggregate_json else: - v[commit] = v2 + gitaggregate_json[commit] = commit_gitaggregate_json # Write output to a temporary file, then rename - with open(os.path.join(git_out_dir, k+'.json.new'), 'w') as fp: - print 'Writing data to {}'.format(k) - json.dump(v, fp, sort_keys=True, indent=2, default=decimal_default) - print 'Renaming file {} to {}'.format(k+'.json.new', k+'.json') - os.rename(os.path.join(git_out_dir, k+'.json.new'), os.path.join(git_out_dir, k+'.json')) + with open(os.path.join(git_out_dir, trimmed_name + '.json.new'), 'w') as filepath: + print 'Writing data to {}'.format(trimmed_name) + json.dump(gitaggregate_json, filepath, sort_keys=True, indent=2, default=decimal_default) + print 'Renaming file {} to {}'.format(trimmed_name + '.json.new', trimmed_name + '.json') + os.rename(os.path.join(git_out_dir, trimmed_name + '.json.new'), os.path.join(git_out_dir, trimmed_name + '.json'))