In [1]:
import pathlib

from pydriller import Repository

In [2]:
test_module_base_path = pathlib.Path("./test_submodules")
repos: dict[str, str] = dict(
    map(
        lambda t: [t[0], str(t[1].absolute())], 
        {
            "seatunnel": test_module_base_path / "seatunnel",
            "ozone": test_module_base_path / "ozone",
            "shenyu": test_module_base_path / "shenyu",
            "dubbo": test_module_base_path / "dubboj",
            "tomcat": test_module_base_path / "tomcat",
        }.items()
    )
)

In [3]:
repo_url = repos["tomcat"]
in_branch = 'main'
commits = Repository(repo_url, only_in_branch=in_branch).traverse_commits()

In [4]:
def is_test_file(filename):
    return filename.endswith(".java") and "Test" in filename 

def is_class_file(filename):
    return filename.endswith(".java") and "Test" not in filename

def get_class_file_name(test_filename):
    return test_filename.replace("Test", "")

# "Test" are written at the end
def get_test_file_name_v1(class_filename):
    return class_filename.replace(".java", "Test.java")

# "Test" are written at the beginning
def get_test_file_name_v2(class_filename):
    return "Test" + class_filename

In [5]:
def tdd_check():
    visited_test_files = []
    visited_class_files = []
    before_set = []
    same_set = []
    before = 0
    non_tdd = 0
    same = 0
    commit_count = 0
    merge_commit_count = 0
    test_file_count = 0
    class_file_count = 0
    mismatch_count = 0
    sizes_of_tdd_commit = []
    sizes_of_same_commit = []

    for (index, commit) in enumerate(commits):
        commit_count += 1
        if commit.merge:
            merge_commit_count += 1
        changed_files = []
        for f in commit.modified_files:
            if f.change_type.name == "ADD" or f.change_type.name == "RENAME":
                changed_files.append(f.filename)
        for file in changed_files:
            if is_test_file(file) and file not in visited_test_files:
                test_file_count += 1
                if get_class_file_name(file) in visited_class_files:
                    non_tdd += 1
                elif get_class_file_name(file) in changed_files:
                    print("If the 'same' is a merge commit:", commit.merge)
                    same += 1
                    same_set.append(file)
                    sizes_of_same_commit.append(len(commit.modified_files))
                else:
                    mismatch_count += 1
            if is_class_file(file) and file not in visited_class_files:
                class_file_count += 1
                if get_test_file_name_v1(file) in visited_test_files or get_test_file_name_v2(file) in visited_test_files:
                    print("If the 'TDD' is a merge commit:", commit.merge)
                    before += 1
                    before_set.append(file)
                    sizes_of_tdd_commit.append(len(commit.modified_files))
                else:
                    mismatch_count += 1
        # increase visited files after all modified files are visited
        for file in changed_files:
            if is_class_file(file):
                visited_class_files.append(file)
            if is_test_file(file):
                visited_test_files.append(file)

    print("Total Commits:", commit_count)
    print("Merge commits:", merge_commit_count)
    print("Test files:", test_file_count)
    print("Class files:", class_file_count)
    print("# cases that test file is created before the class file:", before)
    print("# cases that test file and class file are created in the same commit:", same)
    print("# cases that test file is created after the class file (Non-TDD):", non_tdd)
    print("# cases that a test file cannot find its class file and a class file cannot find its test file:", mismatch_count)
    print("The class files using TDD:", before_set)
    print("The class files that are created with their test files in the same commit:", same_set)
    print("Sizes of tdd commits:", sizes_of_tdd_commit)
    print("Sizes of same commits:", sizes_of_same_commit)


In [6]:
tdd_check()

If the 'same' is a merge commit: False
If the 'same' is a merge commit: False
If the 'same' is a merge commit: False
If the 'same' is a merge commit: False
If the 'same' is a merge commit: False
If the 'same' is a merge commit: False
If the 'same' is a merge commit: False
If the 'same' is a merge commit: False
If the 'same' is a merge commit: False
If the 'same' is a merge commit: False
If the 'same' is a merge commit: False
If the 'same' is a merge commit: False
If the 'same' is a merge commit: False
If the 'same' is a merge commit: False
If the 'same' is a merge commit: False
If the 'same' is a merge commit: False
If the 'TDD' is a merge commit: False
If the 'TDD' is a merge commit: False
If the 'same' is a merge commit: False
If the 'same' is a merge commit: False
If the 'same' is a merge commit: False
If the 'same' is a merge commit: False
If the 'same' is a merge commit: False
If the 'same' is a merge commit: False
If the 'same' is a merge commit: False
If the 'same' is a merge co