In [1]:
import requests
import pandas as pd
from tqdm.notebook import tqdm
import json

In [2]:
with open('token.json') as fi:
    token = json.load(fi)['token']
    
headers = {
    'Authorization': f'Bearer {token}'
}

In [3]:
org = 'nss-data-science-cohort-5'
project_name = 'hmda_shiny'

page = 1
repos = []

while page == 1 or len(response.json()) > 0:
    URL = f'https://api.github.com/orgs/{org}/repos?per_page=100&page={page}'
    response = requests.get(URL, headers = headers)
    repos.extend([x['name'] for x in response.json() if project_name in x['name']])
    page += 1

# Remove the original template repository.    
repos = [x for x in repos if x != project_name]

In [4]:
results = []

for repo_name in tqdm(repos):
    repo = f'{org}/{repo_name}'
    
    # First, retrieve all of the branches. Extract the shas from the results.
    URL = f'https://api.github.com/repos/{repo}/branches'
    response = requests.get(URL, headers = headers)
    shas = [x['commit']['sha'] for x in response.json()]

    # Then, use those shas to retrieve the details.
    
    URL = f'https://api.github.com/repos/{repo}/commits'
    for sha in shas:
        params = {
            'sha': sha,
            'per_page': 100
        }
        response = requests.get(URL, params = params, headers = headers)

        for commit in response.json():
            results.append((repo_name, 
                            commit['sha'], 
                            commit['commit']['author']['date'], 
                            commit['commit']['author']['name'],
                            commit['commit']['message']))

  0%|          | 0/7 [00:00<?, ?it/s]

In [6]:
colnames = ['repo_name', 'sha', 'date', 'committer', 'message']

results = (
    pd.DataFrame(results, columns = colnames)
    .drop_duplicates()
)

results = results[results['committer'] != 'github-classroom[bot]']

results['date'] = pd.to_datetime(results['date'])

This can be grouped to see how many commits each individual has contributed and when the last time they committed was.

In [9]:
results.groupby(['repo_name','committer'])['date'].agg(['count', 'max'])

Unnamed: 0_level_0,Unnamed: 1_level_0,count,max
repo_name,committer,Unnamed: 2_level_1,Unnamed: 3_level_1
hmda_shiny-crazy-eights,Bryan Finlayson,8,2021-12-17 03:23:38+00:00
hmda_shiny-crazy-eights,MyMac,1,2021-12-14 23:57:27+00:00
hmda_shiny-crazy-eights,jrkotun,1,2021-12-17 02:41:19+00:00
hmda_shiny-crazy-eights,ross-k,6,2021-12-22 00:04:19+00:00
hmda_shiny-gum-drops,Jessica Ra,2,2021-12-17 02:10:45+00:00
hmda_shiny-gum-drops,jacob-parks98,10,2021-12-22 00:37:20+00:00
hmda_shiny-gum-drops,yvonnemartinez716,2,2021-12-18 19:44:03+00:00
hmda_shiny-loan-data,Evgeniya Kolesova,13,2021-12-21 23:23:22+00:00
hmda_shiny-loan-data,George Mills,33,2021-12-22 01:35:41+00:00
hmda_shiny-loan-data,Vahideh Rasekhi,9,2021-12-21 17:49:44+00:00
