In [1]:
from github2pandas.github2pandas import GitHub2Pandas
from github2pandas.workflows import Workflows
from github2pandas.version import Version
from pathlib import Path
import os
import logging
import pandas as pd

# Evaluate Workflow Data

-----------------------------------------------------------------
This example illustrates the aggregation of workflow (actions) data using the `github2pandas` repository. Here, all workflows are read out, filtered and displayed in terms of success or failure. 

In [2]:
git_repo_name = "github2pandas"
git_repo_owner = "TUBAF-IFI-DiPiT"

data_root_dir = Path("data")
repo_data_dir = Path(data_root_dir, git_repo_owner, git_repo_name)

In [3]:
workflows_path = Path.joinpath(repo_data_dir, Workflows.Files.DATA_DIR)
df_workflows = GitHub2Pandas.get_pandas_data_frame(workflows_path, Workflows.Files.WORKFLOWS)
df_runs = GitHub2Pandas.get_pandas_data_frame(workflows_path, Workflows.Files.RUNS)

In [4]:
df_workflows

Unnamed: 0,id,name,created_at,updated_at,state
0,6245620,RunTests,2021-02-28 17:31:08,2021-03-08 08:26:16,active


In [5]:
df_runs.head(5)

Unnamed: 0,workflow_id,id,commit_sha,pull_requests,state,event,conclusion,created_at,updated_at
0,6245620,1812008875,32c5932aeb2195a3c2574c5e54ddc3c8cbe5243a,[],completed,push,success,2022-02-08 11:36:49,2022-02-08 11:47:26
1,6245620,1811719898,60d0b12d14d3dc7ea7da87482973d933dbf9515e,[],completed,push,failure,2022-02-08 10:34:29,2022-02-08 10:36:40
2,6245620,1811649706,918357abd12926b6c7f1c674d06be50b77da7cce,[],completed,push,failure,2022-02-08 10:19:35,2022-02-08 10:21:44
3,6245620,1767867041,bf7b3c79b93930fbdf4d1f5f58312fda43bbe723,[],completed,push,success,2022-01-30 05:59:23,2022-01-30 06:09:06
4,6245620,1760887907,b8b66891dc0ce1888af5ca83aea778ddcf92e752,[],completed,push,success,2022-01-28 09:52:05,2022-01-28 10:01:31


## Examples
### Question 1: Do we generate more failures while working on weekend?

In [6]:
df_runs['day_of_week'] = df_runs['created_at'].dt.day_name()

results = df_runs.groupby(['day_of_week', 'conclusion']).commit_sha.count().unstack()
results['ratio'] = results['failure'] / (results['failure'] + results['success'])

results.sort_values(by=['ratio'])

conclusion,failure,success,ratio
day_of_week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Friday,7.0,17.0,0.291667
Thursday,11.0,19.0,0.366667
Monday,17.0,24.0,0.414634
Tuesday,13.0,9.0,0.590909
Saturday,3.0,2.0,0.6
Wednesday,14.0,8.0,0.636364
Sunday,,8.0,


_Result: Best results on Fridays and no failures on Sundays! Impressive!_

### Question 2: What happens during the workflow run?

Let's take a closer view on log files of a specific Action run.

In [7]:
df_runs.iloc[2]

workflow_id                                       6245620
id                                             1811649706
commit_sha       918357abd12926b6c7f1c674d06be50b77da7cce
pull_requests                                          []
state                                           completed
event                                                push
conclusion                                        failure
created_at                            2022-02-08 10:19:35
updated_at                            2022-02-08 10:21:44
day_of_week                                       Tuesday
Name: 2, dtype: object

In [8]:
workflow_run_id = df_runs.iloc[2].id

In [9]:
github_token = os.environ['GITHUB_API_TOKEN']
log_level = logging.DEBUG
github2pandas = GitHub2Pandas(github_token, data_root_dir, log_level=log_level)
repo = github2pandas.get_repo(git_repo_owner, git_repo_name)

In [11]:
Workflows.download_workflow_log_files(repo=repo,
                                  github_token=github_token,
                                  workflow_run_id=workflow_run_id,
                                  data_root_dir=data_root_dir)

10

The workflow logs are stored in the data folder of the project now.

### Questions 3: Check who prepared the workflows

For this request we have to merge Version data with Workflow information. 

1. Prepare commit, edits and workflow dataframes
2. Extract commits adressing workflow-folder `.github/workflow/` from edits
3. Identify authors integrating workflows

In [31]:
versions_path = Path.joinpath(repo_data_dir, Version.Files.DATA_DIR)
df_edits = GitHub2Pandas.get_pandas_data_frame(versions_path, Version.Files.EDITS)
df_commits = GitHub2Pandas.get_pandas_data_frame(versions_path, Version.Files.COMMITS)

In [24]:
relevant_commits = df_edits[df_edits["new_path"].str.contains(".github/workflows/", na=False)][['commit_sha', 'filename']]
relevant_commits.drop_duplicates(inplace = True)
relevant_commits

Unnamed: 0,commit_sha,filename
941,540e6691a42db08b1dc94948b418fa1ea1b7380d,python_package.yml
970,cf2d4124846de842f24ae1deae56a07ab7ab007b,python_package.yml
974,daa70ee1d1cedc8d51e06bdf88ed655bbedc75bf,python_package.yml
986,1bda3b56508bc47073def8d58ed41163fd0b9dcd,python_package.yml
1033,3c43e5af6be062bada6a5e66b4f9503a7aa8a369,python_package.yml
1434,d69b074d24e4419534867ce75578952f1a8ab5d3,python_package.yml
1437,cc01c8a7ec6d3bb3c647acfb627d7a576cac9e77,python_package.yml
3961,b4fdab74ec4504b6caa7c702b86f1d790a1de17a,python_package.yml
7230,0d1e5ad84f4737bb716aa0b718f9b8a8fd1fabb3,python_package.yml
88046,2143785e1b7e2e1162f6406acb4706b00ef787b4,python_package.yml


In [46]:
pd.merge(relevant_commits, 
         df_commits[['author', 'commit_message', 'commit_sha', 'commited_at']],
         how="left", left_on = "commit_sha", right_on = "commit_sha")\
         [['author', 'commit_message', 'commited_at']]

Unnamed: 0,author,commit_message,commited_at
0,take-certain-word-end,Create python_package.yml,2021-02-28 17:31:08
1,take-certain-word-end,Add automated testing,2021-02-28 17:37:26
2,take-certain-word-end,Add github token variable to tests,2021-02-28 17:51:27
3,take-certain-word-end,Exclude private repositories from tests,2021-02-28 18:12:38
4,take-certain-word-end,Integrate commit processing,2021-02-28 20:00:33
5,take-certain-word-end,Update python_package.yml,2021-03-02 08:29:34
6,take-certain-word-end,Update python_package.yml,2021-03-02 08:31:39
7,take-certain-word-end,Update python_package.yml\n\nReplace old test ...,2021-03-08 08:26:15
8,love-go-past-name,edit tests\n\nadd bug-fix/tests to auto test run,2021-03-15 10:31:00
9,love-go-past-name,add v2.0.0 to workflow,2022-01-25 07:42:49
