In [55]:
import pandas as pd
import pathlib
import numpy as np

# reading
git_log = pd.read_csv(
    "git.log",
    sep="\t", 
    header=None,
    names=[
        'additions', 
        'deletions', 
        'filename', 
        'sha', 
        'timestamp', 
        'author'])

# converting in "one line"
git_log = git_log[['additions', 'deletions', 'filename']]\
         .join(git_log[['sha', 'timestamp', 'author']]\
         .fillna(method='ffill'))\
         .dropna()

In [56]:
git_log['timestamp'] = pd.to_datetime(git_log.timestamp, unit='s')
git_log.set_index('timestamp', inplace=True)

git_log['extention'] = git_log.filename.apply(lambda path: pathlib.PurePosixPath(path).suffix) 
git_log.loc[git_log['additions'] == '-', 'additions'] = np.nan 
git_log.loc[git_log['deletions'] == '-', 'deletions'] = np.nan 
git_log['line_count'] = git_log.additions.astype(float) - git_log.deletions.astype(float)

git_log.head(10)

Unnamed: 0_level_0,additions,deletions,filename,sha,author,extention,line_count
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-02-03 12:56:01,25.0,20.0,12_Datenfusion.md,29d83da,Sebastian Zug,.md,5.0
2020-02-03 09:56:49,22.0,5.0,12_Datenfusion.md,0defabd,Sebastian Zug,.md,17.0
2020-02-03 09:39:02,3.0,1.0,11_Regelungstechnik.md,878b126,Sebastian Zug,.md,2.0
2020-02-03 09:38:43,925.0,0.0,12_Datenfusion.md,9675817,Sebastian Zug,.md,925.0
2020-02-03 09:38:43,,,img/12_Datenfusion/2DBayes.png,9675817,Sebastian Zug,.png,
2020-02-03 09:38:43,,,img/12_Datenfusion/JDLmodell.png,9675817,Sebastian Zug,.png,
2020-02-03 09:38:43,,,img/12_Datenfusion/Nahin.png,9675817,Sebastian Zug,.png,
2020-02-03 09:38:43,,,img/12_Datenfusion/opencv_matching.jpg,9675817,Sebastian Zug,.jpg,
2020-01-27 12:04:13,23.0,23.0,README.md,57aae7f,Sebastian Zug,.md,0.0
2020-01-27 09:54:17,357.0,0.0,11_Regelungstechnik.md,95f65b7,Sebastian Zug,.md,357.0


In [57]:
print("Found {0} commits in project's history".format(git_log.sha.unique().shape[0]))

Found 112 commits in project's history


In [58]:
md_commits = git_log[git_log.extention == ".md"].sort_index()
md_commits['line_count_cumsum']= md_commits.line_count.cumsum().astype('int')
md_commits['commit_count'] = 1
md_commits['commit_count'] = md_commits['commit_count'].rolling(md_commits.shape[0]).count().astype('int')
md_commits.head()

Unnamed: 0_level_0,additions,deletions,filename,sha,author,extention,line_count,line_count_cumsum,commit_count
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2019-10-04 04:34:05,2,0,README.md,018d80b,Sebastian Zug,.md,2.0,2,1
2019-10-04 06:11:12,103,2,README.md,40c4587,Sebastian Zug,.md,101.0,103,2
2019-10-04 06:50:29,0,0,00_Einf\303\274hrung.md => 00_Einfuehrung.md,5e5b3a9,Sebastian Zug,.md,0.0,103,3
2019-10-04 06:53:58,59,2,README.md,3378e2e,Sebastian Zug,.md,57.0,160,4
2019-10-07 12:00:43,35,11,README.md,92607e3,Sebastian Zug,.md,24.0,184,5


In [65]:
view = md_commits.resample('D').agg({"line_count": "sum", "commit_count": "count"})
view['line_count_cumsum']= view.line_count.cumsum().astype('int')
view

Unnamed: 0_level_0,line_count,commit_count,line_count_cumsum
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-10-04,160.0,4,160
2019-10-05,0.0,0,160
2019-10-06,0.0,0,160
2019-10-07,374.0,6,534
2019-10-08,19.0,4,553
2019-10-09,0.0,0,553
2019-10-10,0.0,0,553
2019-10-11,24.0,2,577
2019-10-12,139.0,3,716
2019-10-13,84.0,2,800
