```
pip install pandas parsegitlog plotly
```

In [None]:
import os
import pandas as pd
import parsegitlog

import plotly.express as px
pd.options.display.max_columns = 25

In [None]:
df = pd.DataFrame([
    {'Team': 'Songs', 'Github': 'https://github.com/CBRJ-Lyrical-Metrics/song-lyrics-capstone'},
    {'Team': 'Financials', 'Github': 'https://github.com/FinancialForecasters/financial_forecaster'},
    {'Team': 'Customers', 'Github': 'https://github.com/InPersonAnalysis/customer_review_capstone'},
    {'Team': 'Movies', 'Github': 'https://github.com/Movie-Success-Capstone/Movie-Capstone'},
    {'Team': 'Austin', 'Github': 'https://github.com/austin-crime/austin-crime'}
])

In [None]:
# Clones or pulls the repo for each team
def update_team_repo_and_get_commits(team):
    if os.path.exists(f'repos/{team["Team"]}'):
        cmd = f'git -C "repos/{team["Team"]}" pull origin'
        cmd = f'{cmd} main || {cmd} master'
    else:
        cmd = f'git clone {team["Github"]} "repos/{team["Team"]}"'
    print(f'Processing {team["Team"]}')
    print(f'  {cmd}')
    os.system(cmd)
    commits = parsegitlog.get_commits(f'repos/{team["Team"]}')
    commits = [{**c, 'team': team["Team"]} for c in commits]
    return commits

commits = sum([update_team_repo_and_get_commits(team) for team in df.to_dict('records')], [])

df = pd.DataFrame(commits)

df.authored_at = pd.Index(pd.to_datetime(df.authored_at, utc=True)).tz_convert('America/Chicago')
df.committed_at = pd.Index(pd.to_datetime(df.committed_at, utc=True)).tz_convert('America/Chicago')

df = df[df.parents.str.count(' ') == 0] # remove merge commits

days_off = ['2022-05-27', '2022-05-28', '2022-05-29', '2022-05-30', '2022-06-04', '2022-06-05']
df['is_on_day_off'] = df.authored_at.dt.strftime('%Y-%m-%d').isin(days_off)
df['after_hours'] = (~df.is_on_day_off) & ((df.authored_at.dt.hour < 9) | (df.authored_at.dt.hour > 17))
df['outside_working_hours'] = df.after_hours | df.is_on_day_off

In [None]:
# df = df[df.authored_at.dt.strftime('%Y-%m-%d') < pd.to_datetime('2022-06-03').strftime('%Y-%m-%d')]

In [None]:
def fill_missing_days(df, start_date='2022-05-24', fmt='%m/%d %a'):
    'Given a crosstab with dates as the index in the format fmt, fill missing days between start_date and now'
    t = df.transpose()
    days = pd.Series(0, index=pd.date_range(start=start_date, end='now', freq='D').strftime(fmt), name='dummy')
    t = pd.concat([t, days], axis=1).sort_index()
    t = t.drop(columns='dummy').transpose()
    t = t.fillna(0).astype(int)
    return t

In [None]:
def categorize_hour(hour):
    if hour < 9:
        return '1 early morning (00:00 - 8:59)'
    elif hour < 13:
        return '2 morning (9:00 - 12:59)'
    elif hour <= 17:
        return '3 afternoon (13:00 - 17:59)'
    else:
        return '4 evening (18:00 - 23:59)'

df['hour'] = df.authored_at.dt.hour
df['date'] = df.authored_at.dt.strftime('%m/%d %a')
df.hour = df.hour.apply(categorize_hour)

In [None]:
# ctab = pd.concat([pd.crosstab(subset.hour, subset.day).assign(team=team) for team, subset in df.groupby('team')])
ctab = pd.crosstab(df.hour, df.date)

fig = px.imshow(ctab, color_continuous_scale=['white', 'green'])
fig.update_layout(yaxis_title='', xaxis_title='')
fig

In [None]:
ctab = pd.crosstab(df.hour, df.team)
ctab = ctab.apply(lambda col: col / col.sum())
px.imshow(ctab, color_continuous_scale=['white', 'green'])

In [None]:
t = df.set_index(df.authored_at).groupby('team').resample('D').size().unstack(0)
px.line(t)

In [None]:
t = pd.crosstab(df.date, df.team).reset_index().melt(id_vars='date', value_name='n_commits')
px.bar(t, x='date', y='n_commits', facet_col='team', facet_col_wrap=3, title='# of Innis Capstone Team Commits')

In [None]:
t = pd.crosstab([df.date, df.team], df.author_name).reset_index().melt(id_vars=['date', 'team'], value_name='n_commits')
t = t.sort_values(by=['team', 'author_name']).reset_index(drop=True)
t = t.query('n_commits > 0')

orders = {
    'author_name': sum(
        t.groupby('team').apply(lambda subset: subset.author_name.unique().tolist()).tolist(), []
    ),
    'date': sorted(t.date.unique()),
}
px.bar(t, y='n_commits', x='date', facet_col='author_name', facet_col_wrap=4, color='team', height=800, category_orders=orders)

In [None]:
df.groupby('author_name').outside_working_hours.mean().sort_values()

In [None]:
t = pd.crosstab(df.author_name, df.authored_at.dt.hour)
t = pd.concat([t.transpose(), pd.Series(0, index=range(24), name='dummy')], axis=1)
t = t.drop(columns='dummy').transpose()
t = t.fillna(0).astype(int)
t.style.background_gradient(cmap='Reds', axis=1)

In [None]:
df[['team', 'author_name', 'date']].value_counts().rename('n_commits').reset_index()

In [None]:
ctab = pd.crosstab(df.author_name, df.hour).apply(lambda row: row / row.sum(), axis=1)
ctab.style.background_gradient(cmap='Blues', axis=1)

In [None]:
ctab = pd.crosstab(df.author_name, df.hour)
ctab.style.highlight_max(axis=0)

In [None]:
from jupyter_dash import JupyterDash
from dash import dcc, html, Input, Output

ctab = pd.crosstab(df.hour, df.date)
fig = px.imshow(ctab, color_continuous_scale=['white', 'green'], title='All')

app = JupyterDash('time_of_day_heatmap')
app.layout = html.Div([
    html.Pre(['Test'], id='debug'),
    dcc.Dropdown(id='dropdown', options=df.team.unique().tolist() + ['All'], value='All'),
    dcc.Graph(id='fig', figure=fig)
])

@app.callback(Output('debug', 'children'), Output('fig', 'figure'), Input('dropdown', 'value'))
def onupdate(selected_team):
    if selected_team == 'All':
        subset = df.copy()
    else:
        subset = df.query('team == @selected_team')
    ctab = pd.crosstab(subset.hour, subset.date)
    ctab = fill_missing_days(ctab)
    f = px.imshow(ctab, color_continuous_scale=['white', 'green'], title=selected_team)
    return 'test', f


app.run_server(mode='inline')