In [1]:
# Install libraries not present in Anaconda
%pip install pydriller

Note: you may need to restart the kernel to use updated packages.


In [2]:
# Imports
from pydriller import Repository

account = 'IntegerMan'
repository = 'OpenRA'

path = 'https://github.com/' + account + '/' + repository

print('Using repository ' + path)

Using repository https://github.com/IntegerMan/OpenRA


In [3]:
repo = Repository(path)

repo_commits = repo.traverse_commits()

In [4]:
commits = []

for commit in repo_commits:
    for f in commit.modified_files:
        record = {
            'hash': commit.hash,
            'message': commit.msg,
            'author_name': commit.author.name,
    #       'author_email': commit.author.email,
            'author_date': commit.author_date,
    #       'author_tz': commit.author_timezone,
    #      'committer_name': commit.committer.name,
    #      'committer_email': commit.committer.email,
    #      'committer_date': commit.committer_date,
    #      'committer_tz': commit.committer_timezone,
            'in_main': commit.in_main_branch,
            'is_merge': commit.merge,
            'num_deletes': commit.deletions,
            'num_inserts': commit.insertions,
            'net_lines': commit.insertions - commit.deletions,
            'num_files': commit.files,
            'branches': ', '.join(commit.branches), # Comma separated list of branches the commit is found in
            'filename': f.filename,
            'old_path': f.old_path,
            'new_path': f.new_path,
            'project_name': commit.project_name,
            'project_path': commit.project_path, 
            'parents': ', '.join(commit.parents), # Comma separated list of parents
        }
        # Omitted: modified_files (list), project_path, project_name
        commits.append(record)

In [5]:

import pandas as pd

# Translate this list of commits to a Pandas data frame, then export it to CSV for analysis
df_commits = pd.DataFrame(commits)
df_commits.head()

Unnamed: 0,hash,message,author_name,author_date,in_main,is_merge,num_deletes,num_inserts,net_lines,num_files,branches,filename,old_path,new_path,project_name,project_path,parents
0,b59ba43934a3a6837410db51cf60157cf854e52d,openra first commit!\n\ngit-svn-id: svn://svn....,chrisf,2007-06-19 08:51:17+00:00,True,False,0,1350,1350,11,bleed,Blowfish.cs,,MixBrowser\Blowfish.cs,OpenRA,C:\Users\MattE\AppData\Local\Temp\tmpsnu5cl6m\...,
1,b59ba43934a3a6837410db51cf60157cf854e52d,openra first commit!\n\ngit-svn-id: svn://svn....,chrisf,2007-06-19 08:51:17+00:00,True,False,0,1350,1350,11,bleed,MM.DAT,MixBrowser\MM.DAT,MixBrowser\MM.DAT,OpenRA,C:\Users\MattE\AppData\Local\Temp\tmpsnu5cl6m\...,
2,b59ba43934a3a6837410db51cf60157cf854e52d,openra first commit!\n\ngit-svn-id: svn://svn....,chrisf,2007-06-19 08:51:17+00:00,True,False,0,1350,1350,11,bleed,MixBrowser.csproj,,MixBrowser\MixBrowser.csproj,OpenRA,C:\Users\MattE\AppData\Local\Temp\tmpsnu5cl6m\...,
3,b59ba43934a3a6837410db51cf60157cf854e52d,openra first commit!\n\ngit-svn-id: svn://svn....,chrisf,2007-06-19 08:51:17+00:00,True,False,0,1350,1350,11,bleed,MixEntry.cs,,MixBrowser\MixEntry.cs,OpenRA,C:\Users\MattE\AppData\Local\Temp\tmpsnu5cl6m\...,
4,b59ba43934a3a6837410db51cf60157cf854e52d,openra first commit!\n\ngit-svn-id: svn://svn....,chrisf,2007-06-19 08:51:17+00:00,True,False,0,1350,1350,11,bleed,Program.cs,,MixBrowser\Program.cs,OpenRA,C:\Users\MattE\AppData\Local\Temp\tmpsnu5cl6m\...,


In [6]:
df_commits.to_csv(repository + '_FileCommits.csv')