In [None]:
'''
To use the phraser we need build the tool first

git clone https://github.com/tree-sitter/tree-sitter-c.git

git clone https://github.com/tree-sitter/tree-sitter-cpp.git

git clone https://github.com/tree-sitter/tree-sitter-python.git


'''

In [24]:
import os
import json
from tree_sitter import Language, Parser

# Load the languages
Language.build_library(
  'build/experimental-phraser.so',
  ['tree-sitter-c', 'tree-sitter-cpp', 'tree-sitter-python']
)

C_LANGUAGE = Language('build/experimental-phraser.so', 'c')
CPP_LANGUAGE = Language('build/experimental-phraser.so', 'cpp')
PYTHON_LANGUAGE = Language('build/experimental-phraser.so', 'python')

def extract_info(file_path, language):
    with open(file_path, 'r', encoding='utf-8') as f:
        code = f.read()

    parser = Parser()
    parser.set_language(language)

    tree = parser.parse(bytes(code, "utf8"))

    # Get the root node of the syntax tree
    root_node = tree.root_node

    functions = []
    classes = []

    # Use tree traversal to get the function and class names
    for child in root_node.children:
        if child.type == 'function_definition':
            # The function name is the first child of the function definition
            functions.append(child['function_declarator']['identifier'])
        elif child.type in ['class_specifier', 'class_definition']:
            # The class name is the first child of the class specifier or definition
            classes.append(code[child.children[1].start_byte:child.children[1].end_byte])

    return {
        'file_path': file_path,
        'functions': functions,
        'classes': classes
    }


In [25]:
def record_code_elements(root_dir):
    data = []

    for dir_name, sub_dirs, files in os.walk(root_dir):
        for file in files:
            if file.endswith('.c'):
                info = extract_info(os.path.join(dir_name, file), C_LANGUAGE)
                if info:
                    data.append(info)
            elif file.endswith('.cpp'):
                info = extract_info(os.path.join(dir_name, file), CPP_LANGUAGE)
                if info:
                    data.append(info)
            elif file.endswith('.py'):
                info = extract_info(os.path.join(dir_name, file), PYTHON_LANGUAGE)
                if info:
                    data.append(info)

    with open('code_elements_tree_phraser.json', 'w') as f:
        json.dump(data, f, indent=4)


In [26]:
record_code_elements('../pytorch')

TypeError: 'tree_sitter.Node' object is not subscriptable