In [None]:
# Install libraries not present in Anaconda
%pip install pydriller

In [None]:
# Imports
from pydriller import Repository

import pandas as pd
import plotly.express as px

account = 'IntegerMan'
repository = 'OpenRA'

path = 'https://github.com/' + account + '/' + repository

print('Using repository ' + path)

In [None]:
repo = Repository(path)

commits = []

for commit in repo.traverse_commits():
    record = {
        'hash': commit.hash,
        'message': commit.msg,
        'author_name': commit.author.name,
        'author_email': commit.author.email,
        'author_date': commit.author_date,
        'author_tz': commit.author_timezone,
        'committer_name': commit.committer.name,
        'committer_email': commit.committer.email,
        'committer_date': commit.committer_date,
        'committer_tz': commit.committer_timezone,
        'in_main': commit.in_main_branch,
        'is_merge': commit.merge,
        'num_deletes': commit.deletions,
        'num_inserts': commit.insertions,
        'lines': commit.lines,
        'net_lines': commit.insertions - commit.deletions,
        'num_files': commit.files,
        'branches': ', '.join(commit.branches), # Comma separated list of branches the commit is found in
        #'files': ', '.join(commit.modified_files), # Comma separated list of files the commit modifies
        'parents': ', '.join(commit.parents), # Comma separated list of parents
        # PyDriller Open Source Delta Maintainability Model (OS-DMM) stat. See https://pydriller.readthedocs.io/en/latest/deltamaintainability.html for metric definitions
        'dmm_unit_size': commit.dmm_unit_size,
        'dmm_unit_complexity': commit.dmm_unit_complexity,
        'dmm_unit_interfacing': commit.dmm_unit_interfacing,
    }
    # Omitted: modified_files (list), project_path, project_name
    commits.append(record)

In [None]:

# Translate this list of commits to a Pandas data frame, then export it to CSV for analysis
df_commits = pd.DataFrame(commits)
df_commits.to_csv('RawCommits.csv')
df_commits.head()