In [3]:
import json

with open("tags.json", 'r') as f:
    data = [
        json.loads(x) for x in f.readlines()
    ]

In [29]:
# list unique files

unique_files = set()
for item in data:
    unique_files.add(item['rel_fname'])

unique_files = ["root/"+x for x in list(unique_files)]
unique_files

['root/codegraph/wehr.py',
 'root/codegraph/awgr/eagers.py',
 'root/tests/test_utils.py',
 'root/codegraph/utils.py',
 'root/codegraph/parser.py',
 'root/tests/test_data/vizualyzer.py',
 'root/codegraph/core.py',
 'root/codegraph/vizualyzer.py',
 'root/codegraph/main.py',
 'root/tests/test_codegraph.py']

In [58]:
def extract_relationships(file_paths):
    """
    Extracts unique relationships from a list of file paths.

    Args:
    - file_paths (list of str): A list of file paths.

    Returns:
    - list of str: A list of relationships in the format "folder contains folder" or "folder contains file".
    """
    relationships = set()

    for path in file_paths:
        # Split the path into components
        parts = path.split('/')

        # Generate relationships
        for i in range(1, len(parts)):
            start = 1 if i > 1 else 0
            # Construct the current folder path
            current_folder = '/'.join(parts[start:i])
            # Construct the next part (either a folder or a file)
            next_part = '/'.join(parts[1:i+1])

            # if i == 1:
            #     # Special case for the root folder
            #     relationships.add(f"root contains folder {parts[i]}")
            if i < len(parts) - 1:
                # Folder contains folder relationship
                relationships.add(f"{current_folder} contains folder {next_part}")
            else:
                # Folder contains file relationship
                relationships.add(f"{current_folder} contains file {next_part}")

    return list(relationships)


relationships = extract_relationships(unique_files)
_=[print(x) for x in relationships]

tests contains file tests/test_codegraph.py
root contains folder tests
codegraph contains file codegraph/vizualyzer.py
tests contains file tests/test_utils.py
tests contains folder tests/test_data
codegraph/awgr contains file codegraph/awgr/eagers.py
codegraph contains file codegraph/parser.py
tests/test_data contains file tests/test_data/vizualyzer.py
root contains folder codegraph
codegraph contains file codegraph/wehr.py
codegraph contains file codegraph/utils.py
codegraph contains file codegraph/core.py
codegraph contains file codegraph/main.py
codegraph contains folder codegraph/awgr


In [22]:
nodes_csv = {"root": 0}


In [24]:
# All def links
defs = list()
defs_dict = dict()
relations = list()

for item in data:
    if item['kind'] == "ref": continue
    """
    This thing also provides those relations
    file contains function
    file contains class
    class contains method
    """
    defs.append((
        item['fname'], item['name'], item['info']
    ))
    defs_dict[
        (item['fname'], item['name'].split()[-1])
    ] = len(defs_dict)

    print(defs[-1][:-1], len(defs_dict)-1)

    if item['category'] == "function" and '.' not in item['name']:
        relations.append(f"{item['rel_fname']} file contains function {item['name']}")
    
    elif item['category'] == "class":
        relations.append(f"{item['rel_fname']} file contains class {item['name']}")
        for method in item['info'].split('\n'):
            relations.append(f"{item['name']} class contains method {method}")

print("\n\n")
_=[print(x) for x in relations]

('test_input/codegraph-main/codegraph/awgr/eagers.py', 'cfa') 0
('test_input/codegraph-main/codegraph/core.py', 'read_file_content') 1
('test_input/codegraph-main/codegraph/core.py', 'parse_code_file') 2
('test_input/codegraph-main/codegraph/core.py', 'get_code_objects') 3
('test_input/codegraph-main/codegraph/core.py', 'CodeGraph') 4
('test_input/codegraph-main/codegraph/core.py', 'CodeGraph.__init__') 5
('test_input/codegraph-main/codegraph/core.py', 'CodeGraph.get_lines_numbers') 6
('test_input/codegraph-main/codegraph/core.py', 'CodeGraph.usage_graph') 7
('test_input/codegraph-main/codegraph/core.py', 'CodeGraph.get_dependencies') 8
('test_input/codegraph-main/codegraph/core.py', 'get_module_name') 9
('test_input/codegraph-main/codegraph/core.py', 'module_name_in_imports') 10
('test_input/codegraph-main/codegraph/core.py', 'get_imports_and_entities_lines') 11
('test_input/codegraph-main/codegraph/core.py', 'search_entities_from_list_in_code') 12
('test_input/codegraph-main/codegrap

In [25]:
import os
import ast

def parse_imports(file_path):
    with open(file_path, 'r') as file:
        tree = ast.parse(file.read())

    imports = {}
    for node in ast.walk(tree):
        if isinstance(node, ast.Import):
            for alias in node.names:
                imports[alias.asname or alias.name] = alias.name
        elif isinstance(node, ast.ImportFrom):
            module = node.module
            for alias in node.names:
                imports[alias.asname or alias.name] = f"{module}.{alias.name}"

    return imports

def find_imported_function_location(file_path, function_name):
    # Parse the imports in the file
    imports = parse_imports(file_path)

    # Get the base directory of the file
    base_dir = os.path.dirname(file_path)

    # Check if the function is in the imports dictionary
    if function_name in imports:
        import_path = imports[function_name]
        print(file_path, base_dir)
        print(import_path)
        # Resolve the import path to a file path
        parts = (parts := import_path.split('.'))[len(parts)-2:-1]
        resolved_path = os.path.join(base_dir, *parts) + '.py'
        return resolved_path
    else:
        return ""

def get_local_path(file_path, function_name):
    for item in data:
        if item['kind'] == 'def' and item['fname'] == file_path and item['name'].split('.')[-1] == function_name:
            return file_path
    
    return ""
    
def resolve_reference(file_path, function_name):
    if location := get_local_path(file_path, function_name) or find_imported_function_location(file_path, function_name):
        # return location
        return defs_dict.get(
            (location, function_name), -1
        )
    return -1


for item in data:
    if item['kind'] == "def": continue
    if item['rel_fname'] != "tests/test_utils.py" or item['name'] != "get_python_paths_list": continue
    # print(item['rel_fname'], item['name'])
    ref = resolve_reference(item['fname'], item['name'])
    # print(item['rel_fname'], item['name'], ref)

    break

test_input/codegraph-main/tests/test_utils.py test_input/codegraph-main/tests
codegraph.utils.get_python_paths_list


In [26]:
import ast
import os
import builtins

import ast
import os
import builtins

def resolve_reference(name, file_path, project_root):
    """
    Resolves the reference of a function, class, or method to its definition.

    Args:
    - name (str): The name of the function, class, or method to resolve.
    - file_path (str): The relative file path where the name is referenced, starting from the project root.
    - project_root (str): The root directory of the project.

    Returns:
    - str: The file path of the definition, or an empty string if it's a built-in or external module.
    """

    # Parse the file to analyze imports and definitions
    with open(os.path.join(project_root, file_path), 'r') as file:
        tree = ast.parse(file.read())

    # Track imports
    imports = {}

    for node in ast.walk(tree):
        if isinstance(node, ast.Import):
            for alias in node.names:
                imports[alias.asname or alias.name] = alias.name
        elif isinstance(node, ast.ImportFrom):
            module = node.module
            for alias in node.names:
                imports[alias.asname or alias.name] = f"{module}.{alias.name}"

    # Check if the name is imported
    if name in imports:
        import_path = imports[name]
        if '.' in import_path:
            # Handle nested imports
            module_parts = import_path.split('.')
            module_file = os.path.join(project_root, *module_parts[:-1], f"{module_parts[-2]}.py")
            if os.path.exists(module_file):
                return module_file
        else:
            # Handle top-level imports
            module_file = os.path.join(project_root, f"{import_path}.py")
            if os.path.exists(module_file):
                return module_file

    # Search the project directory for the definition
    for root, dirs, files in os.walk(project_root):
        for file in files:
            if file.endswith('.py'):
                module_file_path = os.path.join(root, file)
                with open(module_file_path, 'r') as f:
                    module_tree = ast.parse(f.read())
                    for node in ast.walk(module_tree):
                        if isinstance(node, ast.FunctionDef) or isinstance(node, ast.ClassDef):
                            if node.name == name:
                                return module_file_path

    # If the name is not found, assume it's an external module
    return "No location detected"

project_root = 'test_input/codegraph-main'

for item in data:
    if item['kind'] == "def": continue

    name = item['name']
    file_path = item ['rel_fname']
    definition_location = resolve_reference(name, file_path, project_root)
    location_id = defs_dict.get((definition_location, name), -1)

    print(f"{file_path}\t{name}\t{location_id}")


codegraph/awgr/eagers.py	cfa	0
codegraph/core.py	open	-1
codegraph/core.py	read	-1
codegraph/core.py	read_file_content	1
codegraph/core.py	create_objects_array	37
codegraph/core.py	parse_code_file	2
codegraph/core.py	get_python_paths_list	38
codegraph/core.py	get_code_objects	3
codegraph/core.py	get_imports_and_entities_lines	11
codegraph/core.py	collect_entities_usage_in_modules	14
codegraph/core.py	defaultdict	-1
codegraph/core.py	defaultdict	-1
codegraph/core.py	append	-1
codegraph/core.py	append	-1
codegraph/core.py	populate_free_nodes	15
codegraph/core.py	set	-1
codegraph/core.py	usage_graph	-1
codegraph/core.py	deque	-1
codegraph/core.py	set	-1
codegraph/core.py	popleft	-1
codegraph/core.py	add	-1
codegraph/core.py	items	-1
codegraph/core.py	split	-1
codegraph/core.py	add	-1
codegraph/core.py	append	-1
codegraph/core.py	basename	-1
codegraph/core.py	defaultdict	-1
codegraph/core.py	defaultdict	-1
codegraph/core.py	keys	-1
codegraph/core.py	basename	-1
codegraph/core.py	get_module

In [27]:
defs_dict.keys()

dict_keys([('test_input/codegraph-main/codegraph/awgr/eagers.py', 'cfa'), ('test_input/codegraph-main/codegraph/core.py', 'read_file_content'), ('test_input/codegraph-main/codegraph/core.py', 'parse_code_file'), ('test_input/codegraph-main/codegraph/core.py', 'get_code_objects'), ('test_input/codegraph-main/codegraph/core.py', 'CodeGraph'), ('test_input/codegraph-main/codegraph/core.py', 'CodeGraph.__init__'), ('test_input/codegraph-main/codegraph/core.py', 'CodeGraph.get_lines_numbers'), ('test_input/codegraph-main/codegraph/core.py', 'CodeGraph.usage_graph'), ('test_input/codegraph-main/codegraph/core.py', 'CodeGraph.get_dependencies'), ('test_input/codegraph-main/codegraph/core.py', 'get_module_name'), ('test_input/codegraph-main/codegraph/core.py', 'module_name_in_imports'), ('test_input/codegraph-main/codegraph/core.py', 'get_imports_and_entities_lines'), ('test_input/codegraph-main/codegraph/core.py', 'search_entities_from_list_in_code'), ('test_input/codegraph-main/codegraph/cor