Skip to content

Commit

Permalink
Merge pull request #21 from IQSS/16-contributors
Browse files Browse the repository at this point in the history
add github contributors #16
  • Loading branch information
kcondon committed Jun 4, 2019
2 parents 6f08ea2 + d6f0728 commit 4897f2c
Show file tree
Hide file tree
Showing 9 changed files with 128 additions and 6 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
config.json
cache
*.pyc
*.tsv
contributors.json
5 changes: 5 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ If you would like to run your code through jshint ([2.6.3][] for now), this comm

Running your Python through [flake8][] is appreciated. Some day it would be nice to use [Black][].

We have the following principles when writing Python:

- Support both Python 2 (for now) and Python 3.
- Use the batteries included in the standard library rather than relying on any dependencies. (This makes writing code compatible with both Python 2 and Python 3 more difficult. https://python-future.org/compatible_idioms.html is a helpful resource.)

Please run tests with both `python test.py` (Python 2) and `python3 test.py` (Python 3). Both versions of Python are tested at https://travis-ci.org/IQSS/dataverse-metrics

[tidy]: http://www.html-tidy.org
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,11 @@ Copy `config.json.sample` to `config.json` and edit the following values:
- `endpoints`: An array of Metrics API endpoints to process. Note that the two types are `single` (i.e. `datasets/bySubject`) and `monthly` (i.e. `downloads/toMonth`). (You will notice a third type called `monthly_itemized` in `config.json.sample` but it is not yet supported.)
- `blacklists`: Arrays of terms to blacklist. Only the `datasets/bySubject` endpoint can have a blacklist.
- `colors`: A single color for bar charts and a palette of colors for tree maps.
- `github_repos`: An array of GitHub repos such as `https://github.com/IQSS/dataverse`. A line will be added per repo about the number of contributors.

### Aggregating metrics

Now that your `config.json` file is ready, run the `metrics.py` script to create a TSV file for each of the `endpoints`, which will be placed in the `aggregate_output_dir` directory:
Now that your `config.json` file is ready, run the `metrics.py` script to create a TSV file for each of the `endpoints` and a `contributors.json` file for the `github_repos`, all of which will be placed in the `aggregate_output_dir` directory:

python3 metrics.py

Expand Down
38 changes: 38 additions & 0 deletions aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ def main():
process_monthly_endpoints(monthly_endpoints, api_response_cache_dir, aggregate_output_dir)
process_single_endpoints(single_endpoints, api_response_cache_dir, aggregate_output_dir)
process_monthly_itemized_endpoints(monthly_itemized_endpoints, api_response_cache_dir, aggregate_output_dir)
process_github_contributors(api_response_cache_dir, aggregate_output_dir)


def process_monthly_endpoints(monthly_endpoints, api_response_cache_dir, aggregate_output_dir):
for endpoint in monthly_endpoints:
Expand Down Expand Up @@ -105,5 +107,41 @@ def process_monthly_itemized_endpoints(monthly_itemized_endpoints, api_response_
writer.writerow([month, name, totals[name]])


def process_github_contributors(api_response_cache_dir, aggregate_output_dir):
github_dir = api_response_cache_dir + '/' + 'contributors' + '/' + 'github.com'
contributors_by_repo = []
for owner in os.listdir(github_dir):
owner_dir = github_dir + '/' + owner
for repo in os.listdir(owner_dir):
ids, usernames, urls, avatars = [], [], [], []
repo_url = 'https://github.com/' + owner + '/' + repo
repo_dir = owner_dir + '/' + repo
path_and_json_file = repo_dir + '/' + 'contributors.json'
try:
with open(path_and_json_file) as f:
json_data = json.load(f)
for contributor in json_data:
github_id = contributor['author']['id']
github_username = contributor['author']['login']
github_url = contributor['author']['html_url']
github_avatar = contributor['author']['avatar_url']
ids.append(github_id)
usernames.append(github_username)
urls.append(github_url)
avatars.append(github_avatar)

contributors = [{"username": u, "url": url, "avatar": avatar, "id": gid} for u, url, avatar, gid in zip(usernames, urls, avatars, ids)]
repo_info = {}
repo_info['url'] = repo_url
repo_info['contributors'] = contributors
contributors_by_repo.append(repo_info)
except FileNotFoundError:
pass

contributors_filename = 'contributors.json'
with open(contributors_filename, 'w') as f:
json.dump(contributors_by_repo, f, indent=4, ensure_ascii=True)


if __name__ == '__main__':
main()
8 changes: 4 additions & 4 deletions all-dataverse-installations.json
Original file line number Diff line number Diff line change
Expand Up @@ -444,15 +444,15 @@
},
{
"id": 1789,
"name": "Repositorio de datos de invesigacion",
"name": "Repositorio de Datos de Investigación Universidad del Rosario",
"full_name": "Universidad del Rosario",
"is_active": true,
"description": "",
"description": "Explore research data from Universidad del Rosario affiliated researchers.",
"lat": 4.600598,
"lng": -74.073352,
"logo": "https://dvn-h-prod.hz.lib.harvard.edu/media/logos/dir-46x46.png",
"logo": "https://dvn-h-prod.hz.lib.harvard.edu/media/logos/ur-46x46_ohTqcfg.png",
"url": "http://research-data.urosario.edu.co/",
"slug": "repositorio-de-datos-de-invesigacion",
"slug": "repositorio-de-datos-de-investigacion-universidad-del-rosario",
"version": "4.9.4"
},
{
Expand Down
5 changes: 4 additions & 1 deletion config.json.sample
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,8 @@
],
"files/toMonth": "#006699",
"downloads/toMonth": "#B94617"
}
},
"github_repos": [
"https://github.com/IQSS/dataverse"
]
}
42 changes: 42 additions & 0 deletions contributors.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
$(document).ready(function() {
loadJSON(function(response) {
var contributors = JSON.parse(response);
showContributors(contributors);
},
"contributors.json");
});

function showContributors(contributors) {
document.getElementById("contributors").innerHTML = getContributorsPerRepo(contributors);
}

function getContributorsPerRepo(contributors) {
var all = contributors;
var line = "";
for (var i = 0; i < all.length; ++i) {
var url = all[i].url;
var contrib_url = url + "/graphs/contributors";
var num_contributors = all[i].contributors.length;
personOrPeople = 'person has contributed to';
if (num_contributors > 1) {
personOrPeople = 'people have contributed to';
}
line += "<p>" + num_contributors + " " + personOrPeople + " " + "<a href=\"" + contrib_url + "\" target=\"_blank\">" + url + "</a>.</p>";
}
return line;
}

// https://codepen.io/KryptoniteDove/post/load-json-file-locally-using-pure-javascript
function loadJSON(callback, jsonFile) {

var xobj = new XMLHttpRequest();
xobj.overrideMimeType("application/json");
xobj.open('GET', jsonFile, true);
xobj.onreadystatechange = function() {
if (xobj.readyState == 4 && xobj.status == "200") {
// Required use of an anonymous callback as .open will NOT return a value but simply returns undefined in asynchronous mode
callback(xobj.responseText);
}
};
xobj.send(null);
}
28 changes: 28 additions & 0 deletions download.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,18 @@ def main():
monthly_endpoints = config['endpoints']['monthly']
single_endpoints = config['endpoints']['single']
monthly_itemized_endpoints = config['endpoints']['monthly_itemized']
github_repos = config.get('github_repos')

for installation in installations:
process_monthly_endpoints(installation, monthly_endpoints, api_response_cache_dir, num_months_to_process)
# "monthly itemized" metrics are downloaded the same way as regular montly metrics:
process_monthly_endpoints(installation, monthly_itemized_endpoints, api_response_cache_dir, num_months_to_process)
process_single_endpoints(installation, single_endpoints, api_response_cache_dir)

if github_repos:
for repo in github_repos:
process_github_repo(repo, api_response_cache_dir)


def process_monthly_endpoints(installation, monthly_endpoints, api_response_cache_dir, num_months_to_process):
for endpoint in monthly_endpoints:
Expand Down Expand Up @@ -97,6 +102,29 @@ def process_single_endpoint(installation, endpoint, api_response_cache_dir):
with open(path + '/' + filename, 'w') as outfile:
json.dump(json_out, outfile, indent=4)

def process_github_repo(repo, api_response_cache_dir):
o = urlparse(repo)
path = o.path
owner = path.split('/')[1]
repo = path.split('/')[2]
url = 'https://api.github.com/repos/' + owner + '/' + repo + '/stats/contributors'
try:
response = urlrequest.urlopen(url)
except Exception as e:
# For Python 2 compatibility, handle errors later when calling get_remote_json
pass
try:
json_out = get_remote_json(response)
except Exception as e:
sys.stderr.write('Unable to retrieve JSON from ' + url + '\n')
return
path = api_response_cache_dir + '/' + "contributors" + '/' + "github.com" + '/' + owner + '/' + repo
if not os.path.exists(path):
os.makedirs(path)
filename = 'contributors.json'
with open(path + '/' + filename, 'w') as outfile:
json.dump(json_out, outfile, indent=4)


if __name__ == '__main__':
main()
3 changes: 3 additions & 0 deletions index.html
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
</script>
<script src="plots.js">
</script>
<script src="contributors.js">
</script>
<link href="styles.css" media="screen" rel="stylesheet" type="text/css">
</head>
<body>
Expand Down Expand Up @@ -50,6 +52,7 @@
<p class="help-block small">Data retrieved via <a href="http://guides.dataverse.org/en/latest/api" target="_blank">API</a> from the following Dataverse installations:</p>
<div id="installations" class="help-block small"></div>
<p id="discrepancies" class="help-block small">Metrics are aggregated from multiple Dataverse installations running different versions (4.9 and newer), with different caching schedules, and with some metrics endpoints enabled and others disabled. Minor discrepancies in these metrics can be expected.</p>
<div id="contributors" class="help-block small"></div>
<p class="help-block small">Feedback is welcome via <a href="https://github.com/IQSS/dataverse-metrics" target="_blank">https://github.com/IQSS/dataverse-metrics</a> or <a href="https://dataverse.org/contact" target="_blank">any other channel</a>.</p>
</div>
</div>
Expand Down

0 comments on commit 4897f2c

Please sign in to comment.