# Comprehensive aggregation of GitHub activities

-----------------------------------------------------------------

This examples shows how to aggregate Issue, Pull-Request and Workflow activities as well as complete version history. 

In [None]:
from github2pandas.issues import Issues
from github2pandas.pull_requests import PullRequests
from github2pandas.version import Version
from github2pandas.workflows import Workflows
from github2pandas.utility import Utility
from pathlib import Path
import threading
import time
import os
from datetime import timedelta

## Basic Usage

The most important input parameter is an Repository object from PyGitHub-Package.

In [None]:
git_repo_name = "github2pandas"
git_repo_owner = "TUBAF-IFI-DiPiT"
    
default_data_folder = Path("data", git_repo_name)

github_token = os.environ['GITHUB_API_TOKEN']
# If you do not include your Github Token in .env, its neccessary to integrate it here. 
# github_token = "yourToken"

repo = Utility.get_repo(git_repo_owner, git_repo_name, github_token, default_data_folder)

## Aggregation

In [None]:
print("Issues")
start_time = time.time()
Issues.generate_issue_pandas_tables(repo, default_data_folder)
print("Time: %.3fs" % (time.time() - start_time))

print("Pull Requests")
start_time = time.time()
PullRequests.generate_pull_request_pandas_tables(repo, default_data_folder)
print("Time: %.3fs" % (time.time() - start_time))

print("Version")
start_time = time.time()
Version.clone_repository(repo=repo, data_root_dir=default_data_folder, github_token=github_token)
Version.no_of_proceses = 8
Version.generate_version_pandas_tables( repo=repo, data_root_dir=default_data_folder)
print("Time: %.3fs" % (time.time() - start_time))

print("Workflows")
start_time = time.time()
Workflows.generate_workflow_pandas_tables(repo=repo, data_root_dir=default_data_folder)
print("Time: %.3fs" % (time.time() - start_time))

## Access Pandas DataFrames

In [None]:
users = Utility.get_users(default_data_folder)
pdCommits = Version.get_version(default_data_folder)
pdEdits = Version.get_version(default_data_folder, Version.VERSION_EDITS)
pdBranches = Version.get_version(default_data_folder, filename=Version.VERSION_BRANCHES)
pdIssues = Issues.get_issues(default_data_folder)
pdPull_requests = PullRequests.get_pull_requests(default_data_folder)
pdWorkflows = Workflows.get_workflows(default_data_folder)
pdRuns = Workflows.get_workflows(default_data_folder, filename=Workflows.WORKFLOWS_RUNS)

## Project Overview

### Project Name and Owner

In [None]:
project_owner, project_name = Utility.get_repo_informations(default_data_folder)
print("Owner: " + project_owner)
print("Name: " + project_name)

### Project users (count and anonymized names)

In [None]:
if not users.empty:
    users_count = users.shape[0]
    print("%d Users" % users_count)

for index, row in users.iterrows():
    print("%d: " % index + row["anonym_uuid"])

### Project duration (days)

In [None]:
span = pdCommits.commited_at.max() - pdCommits.commited_at.min()
print(f"Project active for {span.days} days")

###  Does Branches, Pull-Requests, Issues exist in the repository

In [None]:
if pdIssues.empty:
    print("There are no Issues. Look up if there are Issues")
    repo = Utility.get_repo(git_repo_owner, git_repo_name, github_token, default_data_folder)
    Issues.generate_issue_pandas_tables(repo, default_data_folder)
    pdIssues = Issues.get_issues(default_data_folder)
    if pdIssues.empty:
        print("There are no Issues in this Repository")
else:
    print("There are %d Issues in this Repository" % pdIssues.shape[0])

if pdIssues.empty:
    print("There are no Pull Requests. Look up if there are Pull Requests")
    repo = Utility.get_repo(git_repo_owner, git_repo_name, github_token, default_data_folder)
    PullRequests.generate_pull_request_pandas_tables(repo, default_data_folder)
    pdPull_requests = PullRequests.get_pull_requests(default_data_folder)
    if pdIssues.empty:
        print("There are no Pull Requests in this Repository")
else:
    print("There are %d Pull Requests in this Repository" % pdPull_requests.shape[0])

if pdBranches.empty:
    print("There are no Branches in this Repository.")
else:
    print("There are %d Branches in this Repository" % pdBranches.shape[0])