<a href="https://colab.research.google.com/github/Abidzar16/msr-thesis/blob/main/functions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [37]:
!pip install unidiff gql[all] --quiet

In [38]:
from urllib.request import Request, urlopen
from unidiff import PatchSet
import urllib
import os
import json
import difflib

import asyncio
from gql import gql, Client
from gql.transport.aiohttp import AIOHTTPTransport

## 1. Ambil data affected file dari pull request

In [39]:
def string_to_diff(old_string, new_string):
    diff = difflib.unified_diff(old_string.splitlines(keepends=True), new_string.splitlines(keepends=True))
    return ''.join(diff)

In [40]:
endpoint_diff = "https://api.github.com/repos/{owner}/{repo}/pulls/{bug_fixing_pull}.diff"
airflow_owner = "apache"
airflow_repo = "airflow"
headers_diff = {"Accept": "application/vnd.github.diff"}

In [41]:
def fetch_diff(pull_sha):
  url_diff = f"{endpoint_diff.format(owner=airflow_owner, repo=airflow_repo, bug_fixing_pull=pull_sha)}"
  req_diff = Request(url_diff, headers=headers_diff)
  open_diff = urlopen(req_diff)
  encoding = open_diff.headers.get_charsets()[0]
  patch_diff = PatchSet(open_diff, encoding=encoding)

  return patch_diff

In [42]:
def select_relevant_file(patch_diff):
  # Ambil file relevan (affected) yang affected bug-fixing commit
  affected = []
  configuration = []

  for patched_file in patch_diff:
      directories = patched_file.path.split("/")
      path, ext = os.path.splitext(patched_file.path)

      status = "unknown"
      if ext == ".py":

        if directories[0] == "tests":
          status = "tests"
        else:
          status = "affected"
          affected.append(path+ext)

      elif ext == ".rst":
        status = "documentation"

      elif ext == ".yaml":
        status = "configuration"
        # configuration.append(path+ext)
        affected.append(path+ext)

      else :
        status = "unknown"
        affected.append(path+ext)

  return affected

In [43]:
def fetch_affected_line(affected, patch_diff):
  affected_lines = []

  # Fetch affected line number of selected files
  for patched_file in patch_diff:
    path_file = patched_file.path

    if path_file in affected:
      line_number_input = {
          "filename": path_file,
          "modified": [],
      }

      for hunk in patched_file:
          for line in hunk:

              if line.is_added or line.is_removed:
                line_number = line.target_line_no if line.is_added else line.source_line_no
                line_number_input["modified"].append(line_number)

      line_number_input["modified"] = sorted(list(dict.fromkeys(line_number_input["modified"])))
      affected_lines.append(line_number_input)

  return affected_lines

In [44]:
fetch = fetch_diff("21591")

In [45]:
affected = select_relevant_file(fetch)

In [46]:
lines = fetch_affected_line(affected, fetch)

In [47]:
lines

[{'filename': 'airflow/www/templates/airflow/trigger.html', 'modified': [66]}]

## 2. Ambil commit berdasarkan pull request

Menghindari self-reference

In [25]:
def fetch_relevant_commit(pull_request_sha):
  endpoint_commit = "https://api.github.com/repos/{owner}/{repo}/pulls/{pull_request_sha}/commits"
  airflow_owner = "apache"
  airflow_repo = "airflow"

  url_commit = f"{endpoint_commit.format(owner=airflow_owner, repo=airflow_repo, pull_request_sha=pull_request_sha)}"
  req_commit = Request(url_commit)
  response  = urlopen(req_commit)

  encoding = response.info().get_content_charset('utf8')
  data = json.loads(response.read().decode(encoding))

  commit_list = [commit["sha"] for commit in data]

  return commit_list

In [26]:
commit_list = fetch_relevant_commit("21591")

In [27]:
commit_list

['7e555de56289fb1142b2e715628ae74a12fcfaff',
 '1258de2534dce4084f0ecd12bffc5de8e40b9326',
 '774d949f7912f4cdaa23fb71331da9a7d483deca']

## 3. Git Blame berdasarkan pull request

gunakan merge_commit_sha untuk pull request

sha PR: "21591" \
merge_commit_sha: "65297673a318660fba76797e50d0c06804dfcafc" \
path: "airflow/www/templates/airflow/trigger.html"
line_number: 66

In [48]:
query_details = """
  query {
    repository(owner: "apache", name: "airflow") {
      object(expression: "%s") {
        ... on Commit {
          parents(last: 10) {
            edges {
              node {
                oid
                blame(path: "%s") {
                  ranges {
                    startingLine
                    endingLine
                    commit {
                      oid
                      committedDate
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  }
"""

In [49]:
github_token = "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"
url = 'https://api.github.com/graphql'
headers_graphql = {'Authorization': 'Bearer {github_token}'.format(github_token=github_token)}

In [50]:
# Select your transport with a defined url endpoint
transport = AIOHTTPTransport(url="https://api.github.com/graphql",headers=headers_graphql)

# Create a GraphQL client using the defined transport
client = Client(transport=transport, fetch_schema_from_transport=True)

In [51]:
merge_commit_sha = "65297673a318660fba76797e50d0c06804dfcafc"
filepath = "airflow/www/templates/airflow/trigger.html"

In [52]:
async def fetch_blame(commit_sha, path):
  command = query_details % ( commit_sha, path )
  query = gql(command)
  result = await client.execute_async(query)

  return result

In [53]:
blame = await fetch_blame(merge_commit_sha, filepath)

In [54]:
# blame

In [55]:
def fetch_commit_candidate(affected, result):
  blame_result = result['repository']['object']['parents']['edges'][0]['node']['blame']['ranges']
  blame_candidate = []

  for blame in blame_result:
    for line in affected['modified']:
      if line >= blame['startingLine'] and line <= blame['endingLine']:
        blame_candidate.append(blame)

  blame_candidate = [i for n, i in enumerate(blame_candidate) if i not in blame_candidate[n + 1:]]

  return blame_candidate

In [58]:
output_candidate = fetch_commit_candidate(lines[0], blame)

In [59]:
output_candidate

[{'startingLine': 65,
  'endingLine': 67,
  'commit': {'oid': '24d0ecf4ee6dbfd2339847443d4b12033efe9c0f',
   'committedDate': '2020-10-02T14:58:58Z'}}]

In [None]:
## add comment