In [13]:
from tree_sitter import Language, Parser
import tree_sitter_python, tree_sitter_javascript
import os
import json

PY_LANGUAGE = Language(tree_sitter_python.language())
JS_LANGUAGE = Language(tree_sitter_javascript.language())

py_parser = Parser(PY_LANGUAGE)
js_parser = Parser(JS_LANGUAGE)

In [69]:
def parse_python_file(file_path):
    """Parse a single Python file and return its syntax tree."""
    try:
        with open(file_path, 'rb') as file:
            source_code = file.read()
        return parser.parse(source_code), source_code
    except Exception as e:
        print(f"Error parsing {file_path}: {e}")
        return None, None

def node_to_dict(node, source_code, order:int=0):
    return {
        "type": node.type,
        "start_byte": node.start_byte,
        "end_byte": node.end_byte,
        "order": order,
        "text": source_code[node.start_byte:node.end_byte].decode('utf8'),
        "children": [node_to_dict(child, source_code, i) for i, child in enumerate(node.children)]
    }

def scan_root_directory(root_path):
    folders = []
    files = []

    for entry in os.listdir(root_path):
        full_path = os.path.join(root_path, entry)
        if os.path.isdir(full_path):
            folders.append(entry)
        elif os.path.isfile(full_path):
            files.append(entry)

    return {
        "root": root_path,
        "folders": folders,
        "files": files
    }

def process_path(path: str, code_base: dict):
    dir_dict = scan_root_directory(path)
    current_level = code_base
    
    path_parts = [p for p in path.split(os.sep) if p]
    
    # Navigate to the correct level in the dictionary
    for part in path_parts:
        if part not in current_level:
            current_level[part] = {}
        current_level = current_level[part]
    
    # Process files
    for file in dir_dict["files"]:
        if file.endswith('.py'):
            file_path = os.path.join(path, file)
            tree, code = parse_python_file(file_path)
            if tree:
                tree_dict = node_to_dict(tree.root_node, code, 0)
                current_level[file] = tree_dict
        else:
            current_level[file] = "file_content"
    
    # Process subdirectories
    for folder in dir_dict["folders"]:
        folder_path = os.path.join(path, folder)
        if folder not in current_level:
            current_level[folder] = {}
        process_path(folder_path, code_base)
    
    return code_base

# Initialize and run
code_base = {}
test_dict = process_path("sample", code_base)

In [79]:
with open("test.json", "w") as f:
    test_json = json.dump(test_dict, f, indent=2)