In [78]:
import os
import subprocess
import sys
import time
import traceback
from tqdm import tqdm
import re

In [6]:
BASE_DIR = os.getcwd() # os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

In [8]:
def run_command(command: str, cwd=None, timeout=None):
    result = subprocess.run(
        [command],
        timeout=timeout,
        capture_output=True,
        text=True,
        shell=True,
        cwd=cwd,
    )
    return result.stdout

In [9]:
REPO_DIR = 'repos/facebook_react'

# change to the repo directory
os.chdir(REPO_DIR)

In [32]:
run_command('git checkout main')

"Your branch is up to date with 'origin/main'.\n"

In [111]:
# tmp2 = sorted(list(set(re.split(r'[\n\t]+', run_command('git log --pretty=format: --name-status | cut -f2- | sort -u')))))
# 'packages/react-devtools-shared/src/backend/renderer.js' in tmp2, 'src/backend/renderer.js' in tmp2

In [69]:
all_files_string = run_command('git log --pretty=format: --name-status | cut -f2- | sort -u')

# use re.split to split by newlines and tabs
# split the string into a list of files
# all_files = all_files_string.split('\n')
all_files = re.split(r'[\n\t]+', all_files_string)

# ensure the list is unique
all_files = sorted(list(set(all_files)))

In [102]:
'docs/docs/10.6-update.md' in all_files

True

In [96]:
all_files

['',
 '.babelrc',
 '.circleci/config.yml',
 '.circleci/config.yml\textension/.circleci/config.yml',
 '.codesandbox/ci.json',
 '.editorconfig',
 '.eslintignore',
 '.eslintignore\textension/.eslintignore',
 '.eslintrc',
 '.eslintrc\textension/.eslintrc',
 '.eslintrc.js',
 '.flowconfig',
 '.flowconfig\textension/.flowconfig',
 '.flowconfig\tscripts/flow/config/flowconfig',
 '.flowcoverage',
 '.gitattributes',
 '.github/ISSUE_TEMPLATE.md',
 '.github/ISSUE_TEMPLATE/bug_report.md',
 '.github/ISSUE_TEMPLATE/config.yml',
 '.github/ISSUE_TEMPLATE/devtools_bug_report.yml',
 '.github/ISSUE_TEMPLATE/documentation.md',
 '.github/ISSUE_TEMPLATE/question.md',
 '.github/ISSUE_TEMPLATE/react_18.md',
 '.github/PULL_REQUEST_TEMPLATE.md',
 '.github/stale.yml',
 '.github/workflows/commit_artifacts.yml',
 '.github/workflows/devtools_check_repro.yml',
 '.gitignore',
 '.gitignore\textension/.gitignore',
 '.jshintrc',
 '.mailmap',
 '.netlify',
 '.nvmrc',
 '.prettierignore',
 '.prettierignore\textension/.pretti

In [75]:
# sanity check
# 'packages/react-devtools-shared/src/backend/renderer.js' in all_files, 'src/backend/renderer.js' in all_files

In [55]:
def get_all_possible_file_paths(file_path):
    out = run_command(f'git log --follow --name-only --pretty=format:"%H %f" -- {file_path}').split('\n')

    # combinations of 3
    # 6396b664118442f3c2eae7bf13732fcb27bda98f Model-Float-on-Hoistables-semantics-26106
    # packages/react-devtools-shared/src/backend/renderer.js
    # (empty line)

    out = out[1::3] # just get the file paths
    return set(out)

In [58]:
get_all_possible_file_paths('packages/react-reconciler/src/ReactFiberReconciler.js')

{'packages/react-reconciler/src/ReactFiberReconciler.js',
 'packages/react-reconciler/src/ReactFiberReconciler.old.js',
 'src/renderers/shared/fiber/ReactFiberReconciler.js'}

In [79]:
def assign_FIDs(file_paths):
    graph = {}
    FID_to_paths = {}
    path_to_FID = {}
    visited = set()

    # Build the graph
    for file_path in tqdm(file_paths, desc='Building graph', total=len(file_paths)):
        connected_paths = get_all_possible_file_paths(file_path)
        if file_path not in graph:
            graph[file_path] = set()
        for connected_path in connected_paths:
            if connected_path not in graph:
                graph[connected_path] = set()
            graph[file_path].add(connected_path)
            graph[connected_path].add(file_path)

    def dfs(path, current_FID):
        visited.add(path)
        path_to_FID[path] = current_FID
        if current_FID not in FID_to_paths:
            FID_to_paths[current_FID] = []
        FID_to_paths[current_FID].append(path)
        for neighbor in graph[path]:
            if neighbor not in visited:
                dfs(neighbor, current_FID)

    # Assign FIDs
    current_FID = 0
    for file_path in tqdm(file_paths, desc='Assigning FIDs', total=len(file_paths)):
        if file_path not in visited:
            dfs(file_path, current_FID)
            current_FID += 1

    return FID_to_paths, path_to_FID

In [110]:
FID_to_paths, path_to_FID = assign_FIDs(all_files)

# save the FID_to_paths and path_to_FID as 2 json files named fb_FID_to_paths.json and fb_path_to_FID.json
import json
with open(os.path.join(BASE_DIR, 'facebook_react_FID_to_paths.json'), 'w') as f:
    json.dump(FID_to_paths, f)

with open(os.path.join(BASE_DIR, 'facebook_react_path_to_FID.json'), 'w') as f:
    json.dump(path_to_FID, f)

Building graph: 100%|██████████| 9712/9712 [41:38<00:00,  3.89it/s]  
Assigning FIDs: 100%|██████████| 9712/9712 [00:00<00:00, 583196.09it/s]
