In [1]:
import os
os.chdir("/home/david/Documents/projects/app-local-rag-repo/")

In [2]:
import ast
import os
from typing import Dict, List, Optional, Set
from pathlib import Path

# Node Definitions

In [3]:
from typing import List, Optional, Set

class Node:
    def __init__(self, name: str, path: str, parent: Optional['Node'] = None):
        self.name = name.strip()
        self.path = path.strip()  # Full relative path (e.g., "folder1/folder2/script.py")
        self.parent = parent      # Parent node in the hierarchy
        self.children: List[Node] = []

    def add_child(self, node: 'Node'):
        if node not in self.children:
            self.children.append(node)
            node.parent = self  # Set parent when adding child

    def __repr__(self):
        return f"{self.__class__.__name__}({self.name}, {self.path})"


class FolderNode(Node):
    def __init__(self, name: str, path: str, parent: Optional['Node'] = None):
        super().__init__(name, path, parent)

    def __setattr__(self, key, value):
        if isinstance(value, Node):
            self.__dict__[key] = value
        else:
            super().__setattr__(key, value)

    def add_child(self, node: 'Node'):
        super().add_child(node)
        # Dynamically add an attribute for easy access
        if isinstance(node, FolderNode) or isinstance(node, ScriptNode):
            setattr(self, node.name, node)


class ScriptNode(Node):
    def __init__(self, name: str, path: str, parent: Optional['Node'] = None):
        super().__init__(name, path, parent)
        # New fields for tracking dependencies/aliases
        self.script_dependencies: List['ScriptNode'] = []
        self.class_dependencies: List['ClassNode'] = []
        self.function_dependencies: List['FunctionNode'] = []
        # Map alias -> fully qualified node path (e.g. "CA" -> "folder/script1.py::classA")
        self.aliases: Dict[str, str] = {}

class FunctionNode(Node):
    def __init__(self, name: str, path: str, parent: Optional['Node'] = None):
        super().__init__(name, path, parent)

class ClassNode(Node):
    def __init__(self, name: str, path: str, parent: Optional['Node'] = None):
        super().__init__(name, path, parent)
        self.aliases: Set[str] = set()  # Aliases from imports (e.g., "ClassA" for "classAmpere")

class MethodNode(Node):
    def __init__(self, name: str, path: str, parent: Optional['Node'] = None, is_static: bool = False):
        super().__init__(name, path, parent)
        self.is_static = is_static
        self.dependencies: List[Node] = []  # Method call dependencies

    def add_dep(self, node: 'Node'):
        if node not in self.dependencies:
            self.dependencies.append(node)

# Folder

In [4]:
class FolderScriptBuilder:
    """Builds folder and script nodes from the filesystem, with global_path as root."""
    def __init__(self, global_path: str):
        self.global_path = global_path

    def build(self) -> Node:
        root = FolderNode("root", ".", None)  # Root node represents global_path
        # Set the root name to the last part of the global path
        global_path = Path(self.global_path)
        root.name = global_path.parts[-1]

        nodes: Dict[str, Node] = {"root": root}  # Path "" maps to root

        for root_dir, _, filenames in os.walk(self.global_path):
            rel_root = os.path.relpath(root_dir, self.global_path)
            parent = root

            if rel_root != ".":
                parts = rel_root.split("/")
                for i, part in enumerate(parts):
                    folder_path = "/".join(parts[:i + 1])
                    if folder_path not in nodes:
                        folder_node = FolderNode(part, folder_path)
                        nodes[folder_path] = folder_node
                        parent.add_child(folder_node)
                    parent = nodes[folder_path]

            for fname in filenames:
                if fname.endswith(".py"):
                    rel_path = os.path.join(rel_root, fname) if rel_root != "." else fname
                    script_node = ScriptNode(fname, rel_path)
                    nodes[rel_path] = script_node
                    parent.add_child(script_node)

        return root, nodes

In [5]:
# global_path = "/home/david/Documents/glovo/machine-learning-platform/widget_framework"
global_path = "/home/david/Documents/projects/app-local-rag-repo/dummy-folder"

In [6]:
fsb = FolderScriptBuilder(global_path)
root, nodes = fsb.build()

In [7]:
nodes

{'root': FolderNode(dummy-folder, .),
 'script2.py': ScriptNode(script2.py, script2.py),
 'script1.py': ScriptNode(script1.py, script1.py),
 'folder2': FolderNode(folder2, folder2),
 'folder1': FolderNode(folder1, folder1),
 'folder1/folder1_1': FolderNode(folder1_1, folder1/folder1_1),
 'folder1/folder1_1/script1_1A.py': ScriptNode(script1_1A.py, folder1/folder1_1/script1_1A.py),
 'folder1/folder1_1/script1_1B.py': ScriptNode(script1_1B.py, folder1/folder1_1/script1_1B.py),
 'folder1/folder1_2': FolderNode(folder1_2, folder1/folder1_2)}

In [8]:
root.folder1.folder1_1.children

[ScriptNode(script1_1A.py, folder1/folder1_1/script1_1A.py),
 ScriptNode(script1_1B.py, folder1/folder1_1/script1_1B.py)]

# Class Mapper

In [39]:
class ScriptAnalyzer:
    def __init__(self, nodes: Dict[str, Node], global_path: str, query_folder: Optional[str] = None):
        self.nodes = nodes
        self.global_path = Path(global_path)
        self.query_folder = self.global_path / query_folder if query_folder else self.global_path
        self.ast_cache = {}
        self._build_ast()

    def _build_ast(self):
        for path, node in list(self.nodes.items()):
            if isinstance(node, ScriptNode):
                full_path = self.global_path / node.path
                if full_path.exists() and full_path.is_relative_to(self.query_folder):
                    with open(full_path, "r") as file:
                        self.ast_cache[path] = ast.parse(file.read())

    def analyze(self):
        script_nodes = list(self.nodes.items())
        for path, node in script_nodes:
            if isinstance(node, ScriptNode) and path in self.ast_cache:
                tree = self.ast_cache[path]
                self._process_script(node, tree)

    def _process_script(self, script_node: ScriptNode, tree: ast.Module):
        for ast_node in tree.body:
            if isinstance(ast_node, ast.ClassDef):
                self._process_class(script_node, ast_node)
            elif isinstance(ast_node, ast.FunctionDef):
                self._process_function(script_node, ast_node)

    def _process_class(self, script_node: ScriptNode, ast_node: ast.ClassDef):
        class_path = f"{script_node.path}::{ast_node.name}"
        class_node = ClassNode(ast_node.name, class_path, script_node)
        self.nodes[class_path] = class_node
        script_node.add_child(class_node)

        for method in ast_node.body:
            if isinstance(method, ast.FunctionDef):
                method_path = f"{class_path}::{method.name}"
                method_node = MethodNode(method.name, method_path, class_node)
                self.nodes[method_path] = method_node
                class_node.add_child(method_node)

    def _process_function(self, script_node: ScriptNode, ast_node: ast.FunctionDef):
        function_path = f"{script_node.path}::{ast_node.name}"
        function_node = FunctionNode(ast_node.name, function_path, script_node)
        self.nodes[function_path] = function_node
        script_node.add_child(function_node)

In [42]:
global_path = "/home/david/Documents/projects/app-local-rag-repo/dummy-folder"
fsb = FolderScriptBuilder(global_path)
root, nodes = fsb.build()
script_analyzer = ScriptAnalyzer(
    nodes=nodes, 
    global_path=global_path
)
script_analyzer.analyze()

In [43]:
root

FolderNode(dummy-folder, .)

In [44]:
root.folder1.folder1_1.children[1].children

[FunctionNode(indep_fun_11B, folder1/folder1_1/script1_1B.py::indep_fun_11B),
 ClassNode(Class11B, folder1/folder1_1/script1_1B.py::Class11B)]

In [45]:
class11aa = root.folder1.folder1_1.children[0].children[0]
classa2 =   root.folder1.folder1_1.children[0].children[1]

In [46]:
class11aa.path

'folder1/folder1_1/script1_1A.py::Class11AA'

In [47]:
class11aa.children

[MethodNode(__init__, folder1/folder1_1/script1_1A.py::Class11AA::__init__),
 MethodNode(method1, folder1/folder1_1/script1_1A.py::Class11AA::method1),
 MethodNode(method2, folder1/folder1_1/script1_1A.py::Class11AA::method2)]

In [48]:
classa2.path

'folder1/folder1_1/script1_1A.py::ClassA2'

# Import Analyzer

In [100]:
class ImportAnalyzer:
    def __init__(self, global_path: str, nodes: Dict[str, Node], ast_cache: Dict[str, ast.Module], query_folder: Optional[str] = None):
        self.global_path = Path(global_path)
        self.nodes = nodes
        self.ast_cache = ast_cache
        self.query_folder = self.global_path / query_folder if query_folder else self.global_path

    def analyze(self, script_path: Optional[str] = None):
        if script_path:
            script_node = self.nodes.get(script_path)
            if isinstance(script_node, ScriptNode) and script_path in self.ast_cache:
                tree = self.ast_cache[script_path]
                self._process_imports(script_node, tree)
        else:
            for path, node in self.nodes.items():
                full_path = self.global_path / path
                if isinstance(node, ScriptNode) and path in self.ast_cache and full_path.is_relative_to(self.query_folder):
                    tree = self.ast_cache[path]
                    self._process_imports(node, tree)


    def _process_imports(self, script_node: ScriptNode, tree: ast.Module):
        for stmt in tree.body:
            if isinstance(stmt, ast.Import):
                self._handle_import(script_node, stmt)
            elif isinstance(stmt, ast.ImportFrom):
                self._handle_import_from(script_node, stmt)

    def _handle_import(self, script_node: ScriptNode, stmt: ast.Import):
        """
        Example:
            import script2
            import script2 as s2
            import script1, script2 as s2
        """
        for alias in stmt.names:
            module_name = alias.name
            as_name = alias.asname or module_name

            script_path = self._find_script_path(module_name, base_path=script_node.path)
            if script_path and script_path in self.nodes and isinstance(self.nodes[script_path], ScriptNode):
                target_node: ScriptNode = self.nodes[script_path]
                
                # Ensure no duplicate dependencies using a set for paths
                if target_node not in set(script_node.script_dependencies):
                    script_node.script_dependencies.append(target_node)
                    script_node.aliases[as_name] = script_path

    def _handle_import_from(self, script_node: ScriptNode, stmt: ast.ImportFrom):
        """
        Example:
            from script1 import classA, functionB as fB
            from . import something
            from .script2 import SomeClass
        """
        module_name = stmt.module or ""
        level = stmt.level
        from_script_path = self._find_script_path(module_name, level, script_node.path)

        if not from_script_path or from_script_path not in self.nodes:
            return

        from_script_node = self.nodes[from_script_path]
        if not isinstance(from_script_node, ScriptNode):
            return

        for alias in stmt.names:
            imported_name = alias.name
            as_name = alias.asname or imported_name

            fq_path = f"{from_script_path}::{imported_name}"
            if fq_path in self.nodes:
                # If it's a ClassNode
                if isinstance(self.nodes[fq_path], ClassNode):
                    class_node = self.nodes[fq_path]
                    script_node.class_dependencies.append(class_node)
                    script_node.aliases[as_name] = fq_path
                # If it's a FunctionNode
                elif isinstance(self.nodes[fq_path], FunctionNode):
                    func_node = self.nodes[fq_path]
                    script_node.function_dependencies.append(func_node)
                    script_node.aliases[as_name] = fq_path
            else:
                # If no symbol match is found, treat as entire script import
                if from_script_node not in set(script_node.script_dependencies):
                    script_node.script_dependencies.append(from_script_node)
                    # script_node.aliases[as_name] = from_script_path

    def _find_script_path(self, module_name: str, level: int = 0, base_path: str = "") -> Optional[str]:
        if not module_name and level == 0:
            return None

        # Convert module name using dot notation to a relative script path
        relative_path = Path(module_name.replace(".", "/")).with_suffix(".py")
        
        # If relative path exists in nodes, return it
        return str(relative_path) if str(relative_path) in self.nodes else None

In [101]:
# global_path = "/home/david/Documents/projects/app-local-rag-repo/dummy-folder"
global_path = "/home/david/Documents/glovo/machine-learning-platform/"
query_folder = "widget_framework"

# 0) Build the folder and script nodes
fsb = FolderScriptBuilder(global_path)
root, nodes = fsb.build()

# 1) Build Script/Class/Function/Method nodes
script_analyzer = ScriptAnalyzer(
    nodes=nodes, 
    global_path=global_path,
    query_folder=query_folder
)
script_analyzer.analyze()

# 2) Build the import relationships
import_analyzer = ImportAnalyzer(
    global_path=global_path, 
    nodes=nodes, 
    ast_cache=script_analyzer.ast_cache,
    query_folder=query_folder
)
import_analyzer.analyze("widget_framework/api.py")

In [102]:
vars(nodes["widget_framework/api.py"])

{'name': 'api.py',
 'path': 'widget_framework/api.py',
 'parent': FolderNode(widget_framework, widget_framework),
 'children': [FunctionNode(inference, widget_framework/api.py::inference),
  FunctionNode(production_app, widget_framework/api.py::production_app),
  FunctionNode(local_app, widget_framework/api.py::local_app)],
 'script_dependencies': [ScriptNode(constants.py, widget_framework/constants.py)],
 'class_dependencies': [ClassNode(ContractInput, widget_framework/src/utils.py::ContractInput),
  ClassNode(Utils, widget_framework/src/utils.py::Utils),
  ClassNode(WidgetBuilder, widget_framework/src/widget_builder.py::WidgetBuilder)],
 'function_dependencies': [],
 'aliases': {'ContractInput': 'widget_framework/src/utils.py::ContractInput',
  'Utils': 'widget_framework/src/utils.py::Utils',
  'WidgetBuilder': 'widget_framework/src/widget_builder.py::WidgetBuilder'}}

In [96]:
vars(nodes["widget_framework/api.py"])["script_dependencies"][1].path

'widget_framework/constants.py'

In [65]:
script_api = root.widget_framework.children[4]
fun_infe, fun_app, fun_local = script_api.children

In [66]:
vars(script_api)

{'name': 'api.py',
 'path': 'widget_framework/api.py',
 'parent': FolderNode(widget_framework, widget_framework),
 'children': [FunctionNode(inference, widget_framework/api.py::inference),
  FunctionNode(production_app, widget_framework/api.py::production_app),
  FunctionNode(local_app, widget_framework/api.py::local_app)],
 'script_dependencies': [],
 'class_dependencies': [],
 'function_dependencies': [],
 'aliases': {}}

In [29]:
node_api = getattr(root,"api.py")
node_api

ScriptNode(api.py, api.py)

In [30]:
vars(node_api)

{'name': 'api.py',
 'path': 'api.py',
 'parent': FolderNode(widget_framework, .),
 'children': [FunctionNode(inference, api.py::inference),
  FunctionNode(production_app, api.py::production_app),
  FunctionNode(local_app, api.py::local_app)],
 'script_dependencies': [],
 'class_dependencies': [],
 'function_dependencies': [],
 'aliases': {}}

In [21]:
script11a = root.folder1.folder1_1.children[0]
script11b = root.folder1.folder1_1.children[1]

In [22]:
vars(script11a)

{'name': 'script1_1A.py',
 'path': 'folder1/folder1_1/script1_1A.py',
 'parent': FolderNode(folder1_1, folder1/folder1_1),
 'children': [ClassNode(Class11AA, folder1/folder1_1/script1_1A.py::Class11AA),
  ClassNode(ClassA2, folder1/folder1_1/script1_1A.py::ClassA2)],
 'script_dependencies': [ScriptNode(script2.py, script2.py)],
 'class_dependencies': [ClassNode(classA, script1.py::classA)],
 'function_dependencies': [FunctionNode(indep_fun_11B, folder1/folder1_1/script1_1B.py::indep_fun_11B)],
 'aliases': {'CA': 'script1.py::classA',
  'script2': 'script2.py',
  'indep_fun_11B': 'folder1/folder1_1/script1_1B.py::indep_fun_11B'}}

In [23]:
vars(script11b)

{'name': 'script1_1B.py',
 'path': 'folder1/folder1_1/script1_1B.py',
 'parent': FolderNode(folder1_1, folder1/folder1_1),
 'children': [FunctionNode(indep_fun_11B, folder1/folder1_1/script1_1B.py::indep_fun_11B),
  ClassNode(Class11B, folder1/folder1_1/script1_1B.py::Class11B)],
 'script_dependencies': [ScriptNode(script1.py, script1.py)],
 'class_dependencies': [ClassNode(Class11AA, folder1/folder1_1/script1_1A.py::Class11AA)],
 'function_dependencies': [],
 'aliases': {'Class11AA': 'folder1/folder1_1/script1_1A.py::Class11AA',
  'classA2': 'script1.py'}}