# Test Pull Request Data Aggregation

-----------------------------------------------------------------
This example illustrates the aggregation of pull request data using the github2pandas repository. Here, all issues are read out.

In [1]:
from github2pandas.aggregation.pull_requests import AggPullRequest as AggPR
from github2pandas.utility import Utility
from pathlib import Path

## Basic Usage

The most important input parameter is an Repository object from PyGitHub-Package.

In [2]:
git_repo_name = "Extract_Git_Activities"
git_repo_owner = "TUBAF-IFI-DiPiT"
    
default_data_folder = Path("data", git_repo_name)

import os
github_token = os.environ['TOKEN']
# If you do not include your Github Token in .env, its neccessary to integrate it here. 
# github_token = "yourToken"

repo = Utility.get_repo(git_repo_name, github_token)

The code snipet generates a raw data set based on repo information.

In [3]:
AggPR.generate_pull_request_pandas_tables(repo, default_data_folder)

True

In [4]:
pull_requests = AggPR.get_raw_pull_requests(default_data_folder,AggPR.PULL_REQUESTS)
pull_requests.head(5)

Unnamed: 0,id,assignees,assignees_count,body,title,changed_files,closed_at,created_at,deletions,additions,...,labels_count,merged,merged_at,merged_by,state,updated_at,author,comments_count,issue_events_count,reviews_count
0,591555338,,0,add models,Enhancement/table models,6,NaT,2021-03-12 10:26:51,88,519,...,0,False,NaT,,open,2021-03-12 10:27:04,6d1b7901-9635-4705-a7f5-050593c619dc,0,2,0
1,590889161,,0,Add anonym user id to data.\r\n\r\nsave user i...,added extraction of user data to a different file,5,2021-03-11 14:54:17,2021-03-11 14:04:04,94,138,...,0,True,2021-03-11 14:54:17,7be26812-dfaa-4724-932c-6a9613761e42,closed,2021-03-12 06:25:47,6d1b7901-9635-4705-a7f5-050593c619dc,0,5,0
2,590769429,,0,Added a class for each module and numpydoc\r\n...,Enhancement/numpydoc,6,2021-03-11 12:14:29,2021-03-11 10:59:43,292,700,...,0,True,2021-03-11 12:14:29,6d1b7901-9635-4705-a7f5-050593c619dc,closed,2021-03-11 12:14:34,6d1b7901-9635-4705-a7f5-050593c619dc,1,7,0
3,587573488,,0,Can you please run the pullrequest test and ta...,make only one pullrequest pandas table,3,2021-03-10 13:39:18,2021-03-09 08:00:06,68,49,...,0,False,NaT,,closed,2021-03-11 06:51:15,6d1b7901-9635-4705-a7f5-050593c619dc,1,3,0
4,586540229,,0,,Add download of workflow log files to notebook,2,2021-03-08 07:45:00,2021-03-08 07:44:53,12,33,...,0,True,2021-03-08 07:45:00,7be26812-dfaa-4724-932c-6a9613761e42,closed,2021-03-08 07:45:01,7be26812-dfaa-4724-932c-6a9613761e42,0,3,0


In [5]:
pull_requests_comments = AggPR.get_raw_pull_requests(default_data_folder,AggPR.PULL_REQUESTS_COMMENTS)
pull_requests_comments.head(5)

Unnamed: 0,pull_request_id,body,created_at,id,author,reactions_count
0,590769429,`data_dir` contains the data folder in general...,2021-03-11 11:13:20,796660636,7be26812-dfaa-4724-932c-6a9613761e42,0
1,587573488,old version is better,2021-03-10 13:39:18,795426070,6d1b7901-9635-4705-a7f5-050593c619dc,0
2,586304530,Can you please check that the tool-chain \r\n\...,2021-03-07 18:44:17,792331459,7be26812-dfaa-4724-932c-6a9613761e42,0
3,585423113,Why do you separate the folder initialization ...,2021-03-07 15:54:10,589049681,7be26812-dfaa-4724-932c-6a9613761e42,0
4,585423113,Is it really necessary to split all data sampl...,2021-03-07 15:59:02,589050378,7be26812-dfaa-4724-932c-6a9613761e42,0


In [6]:
pull_requests_reactions = AggPR.get_raw_pull_requests(default_data_folder,AggPR.PULL_REQUESTS_REACTIONS)
pull_requests_reactions.head(5)

Unnamed: 0,comment_id,content,created_at,id,author
0,788672723,1,2021-03-03 07:14:32,102774720,6d1b7901-9635-4705-a7f5-050593c619dc


In [7]:
pull_requests_reviews = AggPR.get_raw_pull_requests(default_data_folder,AggPR.PULL_REQUESTS_REVIEWS)
pull_requests_reviews.head(5)

Unnamed: 0,pull_request_id,id,author,body,state,submitted_at
0,586304530,607076374,6d1b7901-9635-4705-a7f5-050593c619dc,Works well!\r\n\r\nhad to run it in pipenv\r\n...,APPROVED,2021-03-09 06:59:57
1,585423113,605859519,7be26812-dfaa-4724-932c-6a9613761e42,I like the idea of a common class structure co...,COMMENTED,2021-03-07 16:00:51
2,585423113,607062305,6d1b7901-9635-4705-a7f5-050593c619dc,,COMMENTED,2021-03-09 06:42:04
3,585423113,607065238,6d1b7901-9635-4705-a7f5-050593c619dc,,COMMENTED,2021-03-09 06:43:20
4,585423113,607082195,6d1b7901-9635-4705-a7f5-050593c619dc,,COMMENTED,2021-03-09 07:11:13


In [8]:
pull_requests_events = AggPR.get_raw_pull_requests(default_data_folder,AggPR.PULL_REQUESTS_EVENTS)
pull_requests_events.head(5)

Unnamed: 0,pull_request_id,author,commit_id,created_at,event,id,assignee,assigner
0,591555338,6d1b7901-9635-4705-a7f5-050593c619dc,,2021-03-12 10:27:04,review_requested,4449938309,,
1,591555338,6d1b7901-9635-4705-a7f5-050593c619dc,,2021-03-12 10:27:12,connected,4449938971,,
2,590889161,6d1b7901-9635-4705-a7f5-050593c619dc,,2021-03-11 14:04:14,review_requested,4445074963,,
3,590889161,7be26812-dfaa-4724-932c-6a9613761e42,07fce6264a5699c5b260d27dda700c9eb82599ce,2021-03-11 14:54:17,referenced,4445456789,,
4,590889161,7be26812-dfaa-4724-932c-6a9613761e42,07fce6264a5699c5b260d27dda700c9eb82599ce,2021-03-11 14:54:17,merged,4445456795,,
