In [None]:
from nwb_project_analytics.gitstats import NWBGitInfo, GitRepos, GitRepo
from nwb_project_analytics.renderstats import RenderCommitStats

In [None]:
import os
from github import Github, Label
import pandas as pd
from tqdm.notebook import tqdm
from datetime import datetime
from collections import defaultdict
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline

# Configure inputs

In [None]:
# save results to
output_dir = os.path.join(os.getcwd(), 'plots/')  
# exclude issues raised by core devs from response-time analysis
DEV_USERNAMES = NWBGitInfo.CORE_DEVELOPERS 
# which repos to use. 
# Set to NWBGitInfo.CORE_API_REPOS to use only main API NWB repos. 
# Set to NWBGitInfo.GIT_REPOS to use all main NWB 2 repos
REPOS = GitRepos([(k, NWBGitInfo.GIT_REPOS[k]) for k in ["PyNWB", 
                                                         "HDMF", 
                                                         "MatNWB", 
                                                         "NWB_Schema",
                                                         "NWBInspector",
                                                         "NWBWidgets",
                                                         "HDMF_Zarr",
                                                         "HDMF_Common_Schema",
                                                         "HDMF_DocUtils"]])
# Set datetime to filter issues older than START. E.g., set to  datetime(2021, 5, 1)
START = datetime(2022, 1, 1) # NWBGitInfo.NWB1_DEPRECATION_DATE

We need an API key from GitHub to access the API. See https://docs.github.com/en/github/authenticating-to-github/keeping-your-account-and-data-secure/creating-a-personal-access-token how to generate an access token.

**WARNING:** Never check in a token to the repo
**WARNING:** Only use tokens with read only access (never write access) to avoid accidental changes to the repo

In [None]:
with open('ghk.txt', 'r') as f:
    API_KEY = f.read().rstrip("\n")
g = Github(API_KEY)

# Compile commit statistics as dataframe

In [None]:
commits_dfs = {repo: repo.get_commits_as_dataframe(since=START, github_obj=g, tqdm=tqdm)
               for repo in tqdm(REPOS.values(), position=0, desc='repos')}

In [None]:
cumulative = False
for repo, cdf in commits_dfs.items():
    if cumulative:
        fig, ax = RenderCommitStats.plot_commit_cumulative_additions_and_deletions(
            commits=cdf,
            repo_name=repo.repo)
    else:
        fig, ax = RenderCommitStats.plot_commit_additions_and_deletions(
            commits=cdf,
            repo_name=repo.repo,
            xaxis_dates=False,
            bar_width=0.9,
            color_additions=RenderCommitStats.COLOR_ADDITIONS,
            color_deletions=RenderCommitStats.COLOR_DELETIONS,
            xticks_rotate=90
        )
    plt.show()

In [None]:
import matplotlib as mpl

def plot_commit_additions_and_deletions_summary(
    commits: dict,
    bar_width: float = 0.8,
    color_additions=RenderCommitStats.COLOR_ADDITIONS,
    color_deletions=RenderCommitStats.COLOR_DELETIONS,
    xticks_rotate: int = 45
):
    """
    Plot bar chart with total additions and deletions for a collection of repositories
    
    :param commits: Dict where the keys are the nwb_project_analytics.gitstats.GitRepo objects 
                    (or the string name of the repo) and the values are pandas DataFrames with 
                    the commits generated via GitRepo.get_commits_as_dataframe
    :param bar_width: Width of the bars
    :param color_additions: Color to be used for additions
    :param color_deletions: Color to be used for deletions
    :param xticks_rotate: Degrees to rotate x axis labels
    """
    repos = [repo.repo if isinstance(repo, GitRepo) else repo for repo in commits.keys()]
    additions = np.array([np.sum(cdf['additions']) for repo, cdf in commits_dfs.items()])
    deletions = np.array([np.sum(cdf['deletions']) for repo, cdf in commits_dfs.items()])
    
    fig = mpl.pyplot.figure(figsize=(12, 6))
    ax = mpl.pyplot.gca()
    x = range(len(repos))
    mpl.pyplot.bar(
        x,
        additions,
        label='additions (total=%i)' % np.sum(additions),
        width=bar_width,
        color=color_additions
    )
    mpl.pyplot.bar(
        x,
        -1 * deletions,
        label='deletions (total=%i)' % (-1 * np.sum(deletions)),
        width=bar_width,
        color=color_deletions
    )
    mpl.pyplot.xticks(x, repos, rotation=xticks_rotate)
    mpl.pyplot.title("Lines of code changed per repository")
    mpl.pyplot.ylabel("Lines of code")
    mpl.pyplot.xlabel("Repository")
    mpl.pyplot.legend()
    return fig, ax
              

fig, ax = plot_commit_additions_and_deletions_summary(
    commits=commits_dfs,
    bar_width=0.8,
    color_additions=RenderCommitStats.COLOR_ADDITIONS,
    color_deletions=RenderCommitStats.COLOR_DELETIONS,
    xticks_rotate= 45)
plt.show()