In [7]:
# Install libraries not present in Anaconda
%pip install pydriller

Note: you may need to restart the kernel to use updated packages.


In [8]:
# Imports
from pydriller import Repository

account = 'IntegerMan'
repository = 'OpenRA' #VisualizingCode' #'OpenRA'

path = 'https://github.com/' + account + '/' + repository

print('Using repository ' + path)

Using repository https://github.com/IntegerMan/OpenRA


In [9]:
repo = Repository(path)

commits = []

for commit in repo.traverse_commits():
    files = []
    for f in commit.modified_files:
        files.append(f.filename) 

    record = {
        'hash': commit.hash,
        'message': commit.msg,
        'author_name': commit.author.name,
        'author_email': commit.author.email,
        'author_date': commit.author_date,
        'author_tz': commit.author_timezone,
        'committer_name': commit.committer.name,
        'committer_email': commit.committer.email,
        'committer_date': commit.committer_date,
        'committer_tz': commit.committer_timezone,
        'in_main': commit.in_main_branch,
        'is_merge': commit.merge,
        'num_deletes': commit.deletions,
        'num_inserts': commit.insertions,
        'lines': commit.lines,
        'net_lines': commit.insertions - commit.deletions,
        'num_files': commit.files,
        'branches': ', '.join(commit.branches), # Comma separated list of branches the commit is found in
        'files': ', '.join(files), # Comma separated list of files the commit modifies
        'parents': ', '.join(commit.parents), # Comma separated list of parents
        # PyDriller Open Source Delta Maintainability Model (OS-DMM) stat. See https://pydriller.readthedocs.io/en/latest/deltamaintainability.html for metric definitions
        'dmm_unit_size': commit.dmm_unit_size,
        'dmm_unit_complexity': commit.dmm_unit_complexity,
        'dmm_unit_interfacing': commit.dmm_unit_interfacing,
    }
    # Omitted: modified_files (list), project_path, project_name
    commits.append(record)

len(commits)

28466

In [11]:

import pandas as pd

# Translate this list of commits to a Pandas data frame, then export it to CSV for analysis
df_commits = pd.DataFrame(commits)
df_commits.to_csv(repository + '_Commits.csv')
df_commits.head()

Unnamed: 0,hash,message,author_name,author_email,author_date,author_tz,committer_name,committer_email,committer_date,committer_tz,...,num_inserts,lines,net_lines,num_files,branches,files,parents,dmm_unit_size,dmm_unit_complexity,dmm_unit_interfacing
0,b59ba43934a3a6837410db51cf60157cf854e52d,openra first commit!\n\ngit-svn-id: svn://svn....,chrisf,chrisf@993157c7-ee19-0410-b2c4-bb4e9862e678,2007-06-19 08:51:17+00:00,0,chrisf,chrisf@993157c7-ee19-0410-b2c4-bb4e9862e678,2007-06-19 08:51:17+00:00,0,...,1350,1350,1350,11,bleed,"Blowfish.cs, MM.DAT, MixBrowser.csproj, MixEnt...",,0.492647,0.794118,0.586397
1,711a99a02215319659a790804e70ed34277346a5,ITS WORKING!!! --some noob\n\ngit-svn-id: svn:...,chrisf,chrisf@993157c7-ee19-0410-b2c4-bb4e9862e678,2007-06-19 10:24:14+00:00,0,chrisf,chrisf@993157c7-ee19-0410-b2c4-bb4e9862e678,2007-06-19 10:24:14+00:00,0,...,152,212,92,4,bleed,"MixBrowser.csproj, MixEntry.cs, MixFile.cs, Pr...",b59ba43934a3a6837410db51cf60157cf854e52d,0.375,1.0,1.0
2,3fdefe451aa8cba9b4057b07dd8ffc8fa2d90f85,git-svn-id: svn://svn.ijw.co.nz/svn/OpenRa@105...,chrisf,chrisf@993157c7-ee19-0410-b2c4-bb4e9862e678,2007-06-19 10:30:35+00:00,0,chrisf,chrisf@993157c7-ee19-0410-b2c4-bb4e9862e678,2007-06-19 10:30:35+00:00,0,...,18,20,16,2,bleed,"MixFile.cs, Program.cs",711a99a02215319659a790804e70ed34277346a5,0.0,1.0,1.0
3,765c0ac0673c10471f5b7b46a008eb78ffa143b2,git-svn-id: svn://svn.ijw.co.nz/svn/OpenRa@105...,chrisf,chrisf@993157c7-ee19-0410-b2c4-bb4e9862e678,2007-06-19 11:31:31+00:00,0,chrisf,chrisf@993157c7-ee19-0410-b2c4-bb4e9862e678,2007-06-19 11:31:31+00:00,0,...,3,16,-10,2,bleed,"MixFile.cs, Program.cs",3fdefe451aa8cba9b4057b07dd8ffc8fa2d90f85,1.0,0.0,0.0
4,3b1a1e3938a4b79815ce0f2bff07bd3d8b654738,git-svn-id: svn://svn.ijw.co.nz/svn/OpenRa@105...,bob,bob@993157c7-ee19-0410-b2c4-bb4e9862e678,2007-06-21 03:20:47+00:00,0,bob,bob@993157c7-ee19-0410-b2c4-bb4e9862e678,2007-06-21 03:20:47+00:00,0,...,409,415,403,8,bleed,"Format40.cs, Format80.cs, ImageDecode.csproj, ...",765c0ac0673c10471f5b7b46a008eb78ffa143b2,0.244792,0.244792,0.942708
