In [94]:
import json

with open("tags.json", 'r') as f:
    data = [
        json.loads(x) for x in f.readlines()
    ]

In [95]:
# list unique files

unique_files = set()
for item in data:
    unique_files.add(item['rel_fname'])

unique_files = ["root/"+x for x in list(unique_files)]
unique_files

['root/codegraph/wehr.py',
 'root/codegraph/awgr/eagers.py',
 'root/tests/test_utils.py',
 'root/codegraph/utils.py',
 'root/codegraph/parser.py',
 'root/tests/test_data/vizualyzer.py',
 'root/codegraph/core.py',
 'root/codegraph/vizualyzer.py',
 'root/codegraph/main.py',
 'root/tests/test_codegraph.py']

In [None]:
nodes_csv = {"root": 0}
edges_csv = set()

def extract_relationships(file_paths):
    """
    Extracts unique relationships from a list of file paths.

    Args:
    - file_paths (list of str): A list of file paths.

    Returns:
    - list of str: A list of relationships in the format "folder contains folder" or "folder contains file".
    """
    relationships = set()

    for path in file_paths:
        # Split the path into components
        parts = path.split('/')

        # Generate relationships
        for i in range(1, len(parts)):
            start = 1 if i > 1 else 0
            # Construct the current folder path
            current_folder = '/'.join(parts[start:i])
            # Construct the next part (either a folder or a file)
            next_part = '/'.join(parts[1:i+1])
            
            for folder in [current_folder, next_part]:
                if folder not in nodes_csv:
                    nodes_csv[folder] = len(nodes_csv)

            relation = "folder" if i < len(parts) - 1 else "file"
            relationships.add(f"{current_folder} contains {relation} {next_part}")
            edges_csv.add((
                nodes_csv[current_folder], f"contains {relation}", nodes_csv[next_part]
            ))

    return list(relationships)


relationships = extract_relationships(unique_files)
_=[print(x) for x in sorted(list(relationships))]

codegraph contains file codegraph/core.py
codegraph contains file codegraph/main.py
codegraph contains file codegraph/parser.py
codegraph contains file codegraph/utils.py
codegraph contains file codegraph/vizualyzer.py
codegraph contains file codegraph/wehr.py
codegraph contains folder codegraph/awgr
codegraph/awgr contains file codegraph/awgr/eagers.py
root contains folder codegraph
root contains folder tests
tests contains file tests/test_codegraph.py
tests contains file tests/test_utils.py
tests contains folder tests/test_data
tests/test_data contains file tests/test_data/vizualyzer.py


In [100]:
# All def links
defs = list()
defs_dict = dict()

for item in data:
    if item['kind'] == "ref": continue
    """f
    This thing also provides those relations
    file contains function
    file contains class
    class contains method
    """
    defs.append((
        item['fname'], item['name'], item['info']
    ))
    defs_dict[
        (item['fname'], item['name'].split()[-1])
    ] = len(defs_dict)

    loc = (item['rel_fname'], item['name'])
    if loc not in nodes_csv:
        nodes_csv[loc] = len(nodes_csv)

    print(defs[-1][:-1], len(defs_dict)-1)

    if item['category'] == "function" and '.' not in item['name']:
        edges_csv.add((
            nodes_csv[item['rel_fname']], "file contains function", nodes_csv[loc]
        ))
    
    elif item['category'] == "class":
        edges_csv.add((
            nodes_csv[item['rel_fname']], "file contains class", nodes_csv[loc]
        ))
        for method in item['info'].split('\n'):
            method_loc = (item['rel_fname'], method)
            if method_loc not in nodes_csv:
                nodes_csv[method_loc] = len(nodes_csv)
            edges_csv.add((
                nodes_csv[loc], "class contains method", nodes_csv[method_loc]
            ))

('test_input/codegraph-main/codegraph/awgr/eagers.py', 'cfa') 0
('test_input/codegraph-main/codegraph/core.py', 'read_file_content') 1
('test_input/codegraph-main/codegraph/core.py', 'parse_code_file') 2
('test_input/codegraph-main/codegraph/core.py', 'get_code_objects') 3
('test_input/codegraph-main/codegraph/core.py', 'CodeGraph') 4
('test_input/codegraph-main/codegraph/core.py', 'CodeGraph.__init__') 5
('test_input/codegraph-main/codegraph/core.py', 'CodeGraph.get_lines_numbers') 6
('test_input/codegraph-main/codegraph/core.py', 'CodeGraph.usage_graph') 7
('test_input/codegraph-main/codegraph/core.py', 'CodeGraph.get_dependencies') 8
('test_input/codegraph-main/codegraph/core.py', 'get_module_name') 9
('test_input/codegraph-main/codegraph/core.py', 'module_name_in_imports') 10
('test_input/codegraph-main/codegraph/core.py', 'get_imports_and_entities_lines') 11
('test_input/codegraph-main/codegraph/core.py', 'search_entities_from_list_in_code') 12
('test_input/codegraph-main/codegrap

In [102]:
edges_csv

{(0, 'contains folder', 1),
 (0, 'contains folder', 5),
 (1, 'contains file', 2),
 (1, 'contains file', 7),
 (1, 'contains file', 8),
 (1, 'contains file', 11),
 (1, 'contains file', 12),
 (1, 'contains file', 13),
 (1, 'contains folder', 3),
 (2, 'file contains function', 57),
 (3, 'contains file', 4),
 (4, 'file contains function', 15),
 (5, 'contains file', 6),
 (5, 'contains file', 14),
 (5, 'contains folder', 9),
 (6, 'file contains function', 61),
 (6, 'file contains function', 62),
 (7, 'file contains function', 53),
 (8, 'file contains class', 34),
 (8, 'file contains class', 39),
 (8, 'file contains class', 42),
 (8, 'file contains class', 44),
 (8, 'file contains class', 46),
 (8, 'file contains function', 50),
 (8, 'file contains function', 51),
 (8, 'file contains function', 52),
 (9, 'contains file', 10),
 (10, 'file contains function', 59),
 (10, 'file contains function', 60),
 (11, 'file contains class', 19),
 (11, 'file contains function', 16),
 (11, 'file contains func

In [25]:
import os
import ast

def parse_imports(file_path):
    with open(file_path, 'r') as file:
        tree = ast.parse(file.read())

    imports = {}
    for node in ast.walk(tree):
        if isinstance(node, ast.Import):
            for alias in node.names:
                imports[alias.asname or alias.name] = alias.name
        elif isinstance(node, ast.ImportFrom):
            module = node.module
            for alias in node.names:
                imports[alias.asname or alias.name] = f"{module}.{alias.name}"

    return imports

def find_imported_function_location(file_path, function_name):
    # Parse the imports in the file
    imports = parse_imports(file_path)

    # Get the base directory of the file
    base_dir = os.path.dirname(file_path)

    # Check if the function is in the imports dictionary
    if function_name in imports:
        import_path = imports[function_name]
        print(file_path, base_dir)
        print(import_path)
        # Resolve the import path to a file path
        parts = (parts := import_path.split('.'))[len(parts)-2:-1]
        resolved_path = os.path.join(base_dir, *parts) + '.py'
        return resolved_path
    else:
        return ""

def get_local_path(file_path, function_name):
    for item in data:
        if item['kind'] == 'def' and item['fname'] == file_path and item['name'].split('.')[-1] == function_name:
            return file_path
    
    return ""
    
def resolve_reference(file_path, function_name):
    if location := get_local_path(file_path, function_name) or find_imported_function_location(file_path, function_name):
        # return location
        return defs_dict.get(
            (location, function_name), -1
        )
    return -1


for item in data:
    if item['kind'] == "def": continue
    if item['rel_fname'] != "tests/test_utils.py" or item['name'] != "get_python_paths_list": continue
    # print(item['rel_fname'], item['name'])
    ref = resolve_reference(item['fname'], item['name'])
    # print(item['rel_fname'], item['name'], ref)

    break

test_input/codegraph-main/tests/test_utils.py test_input/codegraph-main/tests
codegraph.utils.get_python_paths_list


In [126]:
import ast
import os
import builtins

import ast
import os
import builtins

def resolve_reference(name, file_path, project_root):
    """
    Resolves the reference of a function, class, or method to its definition.

    Args:
    - name (str): The name of the function, class, or method to resolve.
    - file_path (str): The relative file path where the name is referenced, starting from the project root.
    - project_root (str): The root directory of the project.

    Returns:
    - str: The file path of the definition, or an empty string if it's a built-in or external module.
    """

    # Parse the file to analyze imports and definitions
    with open(os.path.join(project_root, file_path), 'r') as file:
        tree = ast.parse(file.read())

    # Track imports
    imports = {}

    for node in ast.walk(tree):
        if isinstance(node, ast.Import):
            for alias in node.names:
                imports[alias.asname or alias.name] = alias.name
        elif isinstance(node, ast.ImportFrom):
            module = node.module
            for alias in node.names:
                imports[alias.asname or alias.name] = f"{module}.{alias.name}"

    # Check if the name is imported
    if name in imports:
        import_path = imports[name]
        if '.' in import_path:
            # Handle nested imports
            module_parts = import_path.split('.')
            module_file = os.path.join(project_root, *module_parts[:-1], f"{module_parts[-2]}.py")
            if os.path.exists(module_file):
                return module_file
        else:
            # Handle top-level imports
            module_file = os.path.join(project_root, f"{import_path}.py")
            if os.path.exists(module_file):
                return module_file

    # Search the project directory for the definition
    for root, dirs, files in os.walk(project_root):
        for file in files:
            if file.endswith('.py'):
                module_file_path = os.path.join(root, file)
                with open(module_file_path, 'r') as f:
                    module_tree = ast.parse(f.read())
                    for node in ast.walk(module_tree):
                        if isinstance(node, ast.FunctionDef) or isinstance(node, ast.ClassDef):
                            if node.name == name:
                                return module_file_path

    # If the name is not found, assume it's an external module
    return "No location detected"

project_root = 'test_input/codegraph-main'

current_ref = None
for i, item in enumerate(data):
    if item['kind'] == "def":
        current_ref = item.copy()
        continue

    name = item['name']
    file_path = item['rel_fname']

    definition_location = resolve_reference(name, file_path, project_root)
    definition_location = definition_location[len(project_root)+1:]

    location_id = nodes_csv.get((definition_location, name), -1)
    
    if location_id == -1: continue

    edges_csv.add((
        nodes_csv[(current_ref['rel_fname'], current_ref['name'])],
        "references", location_id
    ))


In [128]:
len(edges_csv)

95

In [130]:
nodes_csv

{'root': 0,
 'codegraph': 1,
 'codegraph/wehr.py': 2,
 'codegraph/awgr': 3,
 'codegraph/awgr/eagers.py': 4,
 'tests': 5,
 'tests/test_utils.py': 6,
 'codegraph/utils.py': 7,
 'codegraph/parser.py': 8,
 'tests/test_data': 9,
 'tests/test_data/vizualyzer.py': 10,
 'codegraph/core.py': 11,
 'codegraph/vizualyzer.py': 12,
 'codegraph/main.py': 13,
 'tests/test_codegraph.py': 14,
 ('codegraph/awgr/eagers.py', 'cfa'): 15,
 ('codegraph/core.py', 'read_file_content'): 16,
 ('codegraph/core.py', 'parse_code_file'): 17,
 ('codegraph/core.py', 'get_code_objects'): 18,
 ('codegraph/core.py', 'CodeGraph'): 19,
 ('codegraph/core.py', 'CodeGraph.__init__'): 20,
 ('codegraph/core.py', 'CodeGraph.get_lines_numbers'): 21,
 ('codegraph/core.py', 'CodeGraph.usage_graph'): 22,
 ('codegraph/core.py', 'CodeGraph.get_dependencies'): 23,
 ('codegraph/core.py', 'get_module_name'): 24,
 ('codegraph/core.py', 'module_name_in_imports'): 25,
 ('codegraph/core.py', 'get_imports_and_entities_lines'): 26,
 ('codegraph

In [142]:
for item in data:
    loc = (item['rel_fname'], item['name'])
    node_id = nodes_csv.get(loc, -1)
    if node_id == -1 or item['kind'] == "ref": continue
    del nodes_csv[loc]
    nodes_csv[node_id] = f"# location: {item['rel_fname']}\n{item['info']}"

nodes_csv_items = nodes_csv.items()
for k, v in nodes_csv_items:
    if type(v) == int:
        del nodes_csv[k]
        nodes_csv[v] = k

In [141]:
nodes_csv

{15: '# location: codegraph/awgr/eagers.py\ndef cfa():\n    return "awgga"',
 16: '# location: codegraph/core.py\ndef read_file_content(path: Text) -> Text:\n    with open(path, "r+") as file_read:\n        return file_read.read()',
 17: '# location: codegraph/core.py\ndef parse_code_file(path: Text) -> List:\n    """read module source and parse to get objects array"""\n    source = read_file_content(path)\n    parsed_module = create_objects_array(source=source, fname=os.path.basename(path))\n    return parsed_module',
 18: '# location: codegraph/core.py\ndef get_code_objects(paths_list: List) -> Dict:\n    """\n        get all code files data for paths list\n    :param paths_list: list with paths to code files to parse\n    :return:\n    """\n    all_data = {}\n    for path in paths_list:\n        content = parse_code_file(path)\n        all_data[path] = content\n    return all_data',
 19: '# location: codegraph/core.py\nCodeGraph.__init__\nCodeGraph.get_lines_numbers\nCodeGraph.usage

In [None]:
nodes = [(k, v) for k, v in nodes_csv.items()]
nodes = sorted(nodes, key=lambda l:l[0])

with open("test_input/output/nodes.csv", 'w') as f:
    f.write("id,name\n")
    for item in nodes:
        f.write(f"{item[0]},{item[1]}\n")

with open("test_input/output/edges.csv", 'w') as f:
    f.write("id_head,type,id_tail\n")
    for item in edges_csv:
        f.write(f"{item[0]},{item[1]},{item[2]}\n")

FileNotFoundError: [Errno 2] No such file or directory: 'output/nodes.csv'