In [53]:
from github2pandas.github2pandas import GitHub2Pandas
from github2pandas.workflows import Workflows
from github2pandas.version import Version
from pathlib import Path
import os
import logging
import pandas as pd

# Evaluate Workflow Data

-----------------------------------------------------------------
This example illustrates the aggregation of workflow (actions) data using the `github2pandas` repository. Here, all workflows are read out, filtered and displayed in terms of success or failure. 

In [54]:
git_repo_name = "github2pandas"
git_repo_owner = "TUBAF-IFI-DiPiT"

data_root_dir = Path("data")
repo_data_dir = Path(data_root_dir, git_repo_owner, git_repo_name)

In [42]:
df_workflows = GitHub2Pandas.get_pandas_data_frame(repo_data_dir, Workflows.Files.DATA_DIR, Workflows.Files.WORKFLOWS)
df_runs = GitHub2Pandas.get_pandas_data_frame(repo_data_dir, Workflows.Files.DATA_DIR, Workflows.Files.RUNS)

In [None]:
df_workflows

In [None]:
df_runs.head(5)

## Examples
### Question 1: Do we generate more failures while working on weekend?

In [None]:
df_runs['day_of_week'] = df_runs['created_at'].dt.day_name()

results = df_runs.groupby(['day_of_week', 'conclusion']).commit_sha.count().unstack()
results['ratio'] = results['failure'] / (results['failure'] + results['success'])

results.sort_values(by=['ratio'])

_Result: Best results on Fridays and no failures on Sundays! Impressive!_

### Question 2: What happens during the workflow run?

Let's take a closer view on log files of a specific failed Action run.

In [None]:
workflow_run_id = df_runs.iloc[2].id
df_runs.iloc[2]

In [56]:
github_token = os.environ['GITHUB_API_TOKEN']
log_level = logging.DEBUG
github2pandas = GitHub2Pandas(github_token, data_root_dir, log_level=log_level)
repo = github2pandas.get_repo(git_repo_owner, git_repo_name)

In [None]:
Workflows.download_workflow_log_files(repo=repo,
                                  github_token=github_token,
                                  workflow_run_id=workflow_run_id,
                                  data_root_dir=repo_data_dir)

The workflow logs are stored in the data folder of the project now.

In [None]:
# !ls ./data/TUBAF-IFI-DiPiT/github2pandas/Workflows/1914733676/extractData
!dir .\data\TUBAF-IFI-DiPiT\github2pandas\Workflows\2378448266\extractData

In [None]:
# !tail -n 10 ./data/TUBAF-IFI-DiPiT/github2pandas/Workflows/1914733676/extractData/"6_Run tests.txt"
!type .\data\TUBAF-IFI-DiPiT\github2pandas\Workflows\2378448266\extractData\"6_Run tests.txt"

### Questions 3: Check who prepared the workflows

For this request we have to merge Version data with Workflow information. 

1. Prepare commit, edits and workflow dataframes
2. Extract commits adressing workflow-folder `.github/workflow/` from edits
3. Identify authors integrating workflows

In [50]:
df_edits = GitHub2Pandas.get_pandas_data_frame(repo_data_dir, Version.Files.DATA_DIR, Version.Files.EDITS)
df_commits = GitHub2Pandas.get_pandas_data_frame(repo_data_dir, Version.Files.DATA_DIR, Version.Files.COMMITS)

In [None]:
relevant_commits = df_edits[df_edits["new_path"].str.contains(".github/workflows/", na=False)][['commit_sha', 'filename']]
relevant_commits.drop_duplicates(inplace = True)
relevant_commits

In [None]:
pd.merge(relevant_commits, 
         df_commits[['author', 'commit_message', 'commit_sha', 'commited_at']],
         how="left", left_on = "commit_sha", right_on = "commit_sha")\
         [['author', 'commit_message', 'commited_at']]