In [None]:
import requests
import pandas as pd
from tqdm.notebook import tqdm
import json
import datetime

In [None]:
with open('token.json') as fi:
    token = json.load(fi)['token']
    
headers = {
    'Authorization': f'Bearer {token}'
}

List the repos that you want to check on. These will be cleaned up, so it doesn't matter if they have ".git" on the end or not.

In [None]:
repos = [
"https://github.com/bdfinlayson/presidency_ngram_viewer",
"https://github.com/czhang87/covid_vaccine.git",
"https://github.com/charre2021/NSS_Midcourse_Project_charre2021.git",
"https://github.com/Corbec/emergency-room-visits",
"https://github.com/ccmerry/injury_rate_chance.git",
"https://github.com/CReihsmann/Shiny_Triathlon.git",
"https://github.com/elavender1/Portfolio-Optimization.git",
"https://github.com/ZhenyaC/st_ownership",
"https://github.com/ripplesphere/pokemon_battle_simulator",
"https://github.com/jrkotun/global-video-game-sales-analysis",
"https://github.com/jacob-parks98/Smart_MAR",
"https://github.com/jcrando/hosptial-cost-",
"https://github.com/JAGVU09/Nashville_STR_effects",
"https://github.com/jrawdata/DSTnashville",
"https://github.com/jrioross/dynasty_fantasy_football_ktc",
"https://github.com/nedatahe/endangered-languages",
"https://github.com/rohitrvenkat/nashville-traffic",
"https://github.com/ross-k/ross-k-market_sector_tracker",
"https://github.com/VahidehRasekhi/twitter_sentiment_analysis",
"https://github.com/vredd418/Effectiveness-of-the-Metro-Nashville-Govt.-",
"https://github.com/yvonnemartinez716/orthopedicsurgerycosts.git"
]

In [None]:
def prepare_repo_name(repo_name):
    if repo_name[:19] == 'https://github.com/':
        repo_name = repo_name[19:]
    if repo_name[-4:] == '.git':
        repo_name = repo_name[:-4]
        
    return repo_name

repos = [prepare_repo_name(repo) for repo in repos]

In [None]:
results = []

for repo_name in tqdm(repos):
    repo = f'{repo_name}'
    
    # First, retrieve all of the branches. Extract the shas from the results.
    URL = f'https://api.github.com/repos/{repo}/branches'
    response = requests.get(URL, headers = headers)
    shas = [x['commit']['sha'] for x in response.json()]

    # Then, use those shas to retrieve the details.
    URL = f'https://api.github.com/repos/{repo}/commits'
    for sha in shas:
        params = {
            'sha': sha,
            'per_page': 100,
            'token': token
        }
        response = requests.get(URL, params = params, headers = headers)

        for commit in response.json():
            results.append((repo_name, 
                            commit['sha'], 
                            commit['commit']['author']['date'], 
                            commit['commit']['author']['name'],
                            commit['commit']['message']))

This creates a DataFrame, `results` which contains information on all commits.

In [None]:
colnames = ['repo_name', 'sha', 'date', 'committer', 'message']

results = (
    pd.DataFrame(results, columns = colnames)
    .drop_duplicates()
)

results['date'] = pd.to_datetime(results['date'])

The DataFrame `results_latest` shows the most recent commit and how long ago it was, along with a count of total commits.

In [None]:
results_latest = results.groupby(['repo_name'])['date'].agg(['count', 'max'])

results_latest['time_since_last_commit'] = datetime.datetime.utcnow() - results_latest['max'].dt.tz_localize(None)

In [None]:
results_latest

You can also easily filter for people who haven't committed in, for example, 3 days.

In [None]:
cutoff = '3d'

results_latest[results_latest['time_since_last_commit'] > pd.to_timedelta(cutoff)]