In [1]:
from typing import Dict, Literal, NamedTuple, Set
from collections import defaultdict

import pydriller
import pathlib
from pprint import pprint

In [2]:
test_modules_path = pathlib.Path("test_submodules")
production_modules_path = pathlib.Path("production_submodules")

In [3]:
EventTypes = Literal["tdd", "same", "non-tdd"]

class EventInstance(NamedTuple):
    code_commit_index: int
    test_commit_index: int
    code_name: str
    test_name: str

visited_file_names: Dict[str, int] = {}
counters: Dict[EventTypes, int] = defaultdict(int)
counter_instances: Dict[EventTypes, Set[EventInstance]] = defaultdict(set)

In [4]:
def is_test_file(filename: str) -> bool:
    return filename.endswith("Test.java")

In [5]:
def get_code_file_name(filename: str) -> str:
    return filename.replace("Test.java", ".java")

In [6]:
def get_test_file_name(filename: str) -> str:
    return filename.replace(".java", "Test.java")

In [7]:
for index, commit in enumerate(pydriller.Repository(str((test_modules_path / "dubboj").absolute())).traverse_commits()):
    # loop from the first commit to the last 
    
    files_in_commit = {modified.filename for modified in commit.modified_files if modified.filename not in visited_file_names}
    
    for current_file_name in files_in_commit:
        if is_test_file(current_file_name): 
            if current_file_name not in visited_file_names:
                # haven't seen this test before
                code_file_name = get_code_file_name(current_file_name)
                
                if code_file_name in files_in_commit:
                    # the code is in the same commit as the test, same
                    counters["same"] += 1
                    counter_instances["same"].add(EventInstance(index, index, code_file_name, current_file_name))
                else:
                    # the code has not been seen, non-tdd
                    if code_file_name in visited_file_names:
                        # seen code before test
                        counters["non-tdd"] += 1
                        counter_instances["non-tdd"].add(EventInstance(visited_file_names[code_file_name], index, code_file_name, current_file_name))
        else:
            # the current file is a code file
            test_file_name = get_test_file_name(current_file_name)
            
            if test_file_name in visited_file_names:
                # the test file has been seen before
                counters["tdd"] += 1
                counter_instances["tdd"].add(EventInstance(index, visited_file_names[test_file_name], current_file_name, test_file_name))
    
    for file_name in files_in_commit:
        visited_file_names[file_name] = index
        

In [8]:
pprint(counters)
pprint(counter_instances)

defaultdict(<class 'int'>, {'same': 402, 'non-tdd': 457, 'tdd': 6})
defaultdict(<class 'set'>,
            {'non-tdd': {EventInstance(code_commit_index=1, test_commit_index=142, code_name='UrlUtils.java', test_name='UrlUtilsTest.java'),
                         EventInstance(code_commit_index=1, test_commit_index=160, code_name='ChangeTelnetHandler.java', test_name='ChangeTelnetHandlerTest.java'),
                         EventInstance(code_commit_index=1, test_commit_index=165, code_name='InjvmProtocol.java', test_name='InjvmProtocolTest.java'),
                         EventInstance(code_commit_index=1, test_commit_index=167, code_name='CurrentTelnetHandler.java', test_name='CurrentTelnetHandlerTest.java'),
                         EventInstance(code_commit_index=1, test_commit_index=167, code_name='ListTelnetHandler.java', test_name='ListTelnetHandlerTest.java'),
                         EventInstance(code_commit_index=1, test_commit_index=175, code_name='LogTelnetHandler.java', tes