In [1]:
from pathlib import Path
from src.github_miner.linker.transformers import GitHubProjectTransformer
from src.common.models import GitProject, Issue
from src.inspector_git.linker.transformers import GitProjectTransformer
from src.inspector_git.reader.iglog.readers.ig_log_reader import IGLogReader
from src.run_blame import check_blame
import sys
import pickle
from src.jira_miner.reader_dto.loader import JiraJsonLoader
from src.jira_miner.linker.transformers import JiraProjectTransformer
from src.github_miner.reader_dto.loader import GithubJsonLoader
from src.github_miner.linker.transformers import GitHubProjectTransformer

# sys.setrecursionlimit(16336)


def build_and_check_project(iglog_path: str, repo_path: str, compute_annotated_lines :bool) -> GitProject:
    iglog_file = Path(iglog_path)

    with open(iglog_file, "r", encoding="utf-8") as f:
        git_log_dto = IGLogReader().read(f)

    transformer = GitProjectTransformer(
        git_log_dto,
        name=iglog_file.stem,
        compute_annotated_lines=compute_annotated_lines,
    )
    project = transformer.transform()

    if compute_annotated_lines:
        check_blame(project, repo_path)

    return project

git_project = build_and_check_project(
    "../../test-input/inspector-git/zeppelin.iglog",
    "/home/vortex/Work/BachelorThesis/voyager-target/test_inspector_git_repo/TestInspectorGitRepo",
    False
)

path_jira = "../../test-input/jira-miner/ZEPPELIN-detailed-issues.json"

jira_loader = JiraJsonLoader(path_jira)
jira_data = jira_loader.load()

jira_transformer = JiraProjectTransformer(jira_data, name="Jira Project")
jira_project = jira_transformer.transform()

path_github = "../../test-input/github-miner/githubProject.json"

github_loader = GithubJsonLoader(path_github)
github_data = github_loader.load()

github_transformer = GitHubProjectTransformer(github_data, name="GitHub Project")
github_project = github_transformer.transform()


from src.common.project_linkers import ProjectLinker

ProjectLinker.link_projects(github_project, jira_project, jira_data)
ProjectLinker.link_projects(jira_project, git_project)
ProjectLinker.link_projects(github_project, git_project)

graph_data = {
        "git": git_project,
        "jira": jira_project,
        "github": github_project,
    }

In [3]:
# 1. What are the top 5 most modified files and how many times were they modified?
from collections import Counter

file_counter = Counter()
git_project = graph_data["git"]

for file in git_project.file_registry.all:
    fname = file.last_existing_name()
    file_counter[fname] = len(file.changes)

top_5_files = file_counter.most_common(5)

print("The 5 most modified files (with number of modifications):")
for fname, count in top_5_files:
    print(f"  {fname}: {count} modifications")

The 5 most modified files (with number of modifications):
  pom.xml: 522 modifications
  zeppelin-web/src/app/notebook/paragraph/paragraph.controller.js: 465 modifications
  zeppelin-interpreter/src/main/java/org/apache/zeppelin/conf/ZeppelinConfiguration.java: 355 modifications
  zeppelin-server/src/main/java/org/apache/zeppelin/socket/NotebookServer.java: 332 modifications
  spark/interpreter/src/main/java/org/apache/zeppelin/spark/OldSparkInterpreter.java: 274 modifications


In [5]:
# 2. Which user contributed to the most Jira issues?
from collections import defaultdict

git_project = graph_data["git"]
user_issue_count = defaultdict(set)

for account in git_project.account_registry.all:
    for commit in account.commits:
        for issue in commit.issues:
            user_issue_count[account].add(issue)

most_contributing_user = max(user_issue_count.items(), key=lambda x: len(x[1]), default=None)

if most_contributing_user:
    user, issues = most_contributing_user
    print(f"User {user.git_id} contributed to {len(issues)} Jira issues (the most).")
else:
    print("No user found with associated issues.")

User Jeff Zhang <zjffdu@apache.org> contributed to 995 Jira issues (the most).


In [8]:
# 3. What are the top 5 “bug magnet” files?
# (the most occurrences in commits associated with issues of type "bug")

from collections import Counter

bug_file_counter = Counter()

# for issue_type in jira_project.issue_type_registry.all:
#     if issue_type.name.lower() == "bug":
#         for issue in issue_type.issues:
#             for commit in issue.git_commits:
#                 for change in commit.changes:
#                     file = change.file
#                     bug_file_counter[file.last_existing_name()] += 1

for issue in jira_project.issue_registry.all:
    for issue_type in issue.issue_types:
        if issue_type.name.lower() == "bug":
            for commit in issue.git_commits:
                for change in commit.changes:
                    bug_file_counter[change.file.last_existing_name()] += 1

top_5_bug_magnets = bug_file_counter.most_common(5)
print("Top 5 fișiere bug magnets:")
for fname, count in top_5_bug_magnets:
    print(f"  {fname}: {count} apariții în commituri asociate cu bug-uri")


Top 5 fișiere bug magnets:
  zeppelin-web/src/app/notebook/paragraph/paragraph.controller.js: 94 apariții în commituri asociate cu bug-uri
  zeppelin-server/src/main/java/org/apache/zeppelin/socket/NotebookServer.java: 90 apariții în commituri asociate cu bug-uri
  zeppelin-zengine/src/main/java/org/apache/zeppelin/notebook/Note.java: 69 apariții în commituri asociate cu bug-uri
  pom.xml: 66 apariții în commituri asociate cu bug-uri
  zeppelin-web/src/app/notebook/notebook.controller.js: 61 apariții în commituri asociate cu bug-uri
