In [25]:
import os
import json
import yaml

# File Structure

In [3]:
class Node:
    """
    Represents a file or directory node.
    - name: The name of the file or folder.
    - type: Either "file" or "folder".
    - parent: Reference to the parent Node (None for root).
    - children: List of child Nodes (only applicable if type == "folder").
    """
    def __init__(self, name, node_type, parent=None):
        self.name = name
        self.type = node_type  # "folder" or "file"
        self.parent = parent
        self.children = []  # Only meaningful for folders

    def add_child(self, child_node):
        self.children.append(child_node)

    def tree_repr(self, prefix=""):
        """
        Recursively generate a string representation similar to the "tree ." command.
        """
        lines = []
        connector = "└── " if prefix.endswith("└── ") else "├── "
        lines.append(f"{prefix}{self.name}")
        if self.type == "folder":
            # Prepare new prefix for children
            child_prefix = prefix + ("    " if prefix.endswith("└── ") else "│   ")
            count = len(self.children)
            for idx, child in enumerate(self.children):
                # For the last child, adjust the prefix
                next_prefix = prefix + ("    " if idx == count - 1 else "│   ")
                lines.append(child.tree_repr(prefix=next_prefix))
        return "\n".join(lines)

    def __repr__(self):
        return f"Node(name='{self.name}', type='{self.type}')"


In [31]:
class FileTreeGraph:
    """
    Builds a file tree from a given directory using two representations:
    1. A dictionary-based representation: { "name": <str>, "files": [<str>], "folders": [<dict>] }
    2. An object-based representation using the Node class.
    """
    def __init__(self, root_path):
        self.root_path = os.path.abspath(root_path)
        self.dict_tree = None
        self.node_tree = None

    def build_dict_tree(self, current_path=None):
        """
        Recursively builds the dictionary representation of the file tree.
        """
        if current_path is None:
            current_path = self.root_path

        tree = {"name": os.path.basename(current_path) or current_path,
                "files": [],
                "folders": []}
        try:
            for entry in sorted(os.listdir(current_path)):
                full_path = os.path.join(current_path, entry)
                if os.path.isdir(full_path):
                    tree["folders"].append(self.build_dict_tree(full_path))
                else:
                    tree["files"].append(entry)
        except Exception as e:
            print(f"Error reading directory {current_path}: {e}")
        return tree

    def build_node_tree(self, current_path=None, parent_node=None):
        """
        Recursively builds the Node-based representation of the file tree.
        """
        if current_path is None:
            current_path = self.root_path

        node = Node(name=os.path.basename(current_path) or current_path, node_type="folder", parent=parent_node)
        try:
            for entry in sorted(os.listdir(current_path)):
                full_path = os.path.join(current_path, entry)
                if os.path.isdir(full_path):
                    child_node = self.build_node_tree(full_path, parent_node=node)
                    node.add_child(child_node)
                else:
                    file_node = Node(name=entry, node_type="file", parent=node)
                    node.add_child(file_node)
        except Exception as e:
            print(f"Error reading directory {current_path}: {e}")
        return node

    def build_graphs(self):
        """
        Build both the dictionary and node representations.
        """
        self.dict_tree = self.build_dict_tree()
        self.node_tree = self.build_node_tree()

    def dict_tree_repr(self, tree=None, indent=""):
        """
        Generate a string representation for the dictionary tree,
        similar to the output of the "tree ." command.
        """
        if tree is None:
            tree = self.dict_tree
        lines = []
        lines.append(f"{indent}{tree['name']}")
        # Files in the current folder
        for file in tree["files"]:
            lines.append(f"{indent}    {file}")
        # Recursively process folders
        for folder in tree["folders"]:
            lines.append(self.dict_tree_repr(folder, indent + "    "))
        return "\n".join(lines)

    def get_graphs(self):
        """
        Returns both representations.
        """
        if self.dict_tree is None or self.node_tree is None:
            self.build_graphs()
        return {"dict_tree": self.dict_tree, "node_tree_repr": self.node_tree.tree_repr()}
    
    def export_to_yaml(self, output_file):
        """
        Exports the dictionary tree representation to a YAML file.
        """
        # Ensure the dict tree is built
        if self.dict_tree is None:
            self.dict_tree = self.build_dict_tree()
        try:
            with open(output_file, 'w') as f:
                yaml.dump(self.dict_tree, f, default_flow_style=False)
            print(f"YAML exported to {output_file}")
        except Exception as e:
            print(f"Failed to export YAML: {e}")


In [27]:
path = "/home/david/Documents/glovo/machine-learning-platform/widget_framework"

In [28]:
ftg = FileTreeGraph(path)

In [29]:
ftg.export_to_yaml("tree.yaml")

YAML exported to tree.yaml


# File Dependecy

In [59]:
import ast
import os
from pathlib import Path
from typing import List, Dict, Any, Set

class PythonImportDAG:
    """
    Builds a nested DAG graph of Python file imports starting from a given entry Python file.
    The DAG includes dependencies between files traced via imports.
    """
    def __init__(self, root_path: str, entry_file: str):
        """
        Initialize the PythonImportDAG.
        
        :param root_path: The root directory of the project (like "machine-learning-platform").
        :param entry_file: The relative path of the entry Python file (relative to root_path).
        """
        self.root_path = Path(root_path).resolve()  # Absolute path of the project root
        self.entry_file = (self.root_path / entry_file).resolve()  # Absolute path of the entry file
        self.file_tree_graph = FileTreeGraph(root_path)  # Use FileTreeGraph for file path checks
        self.file_tree_graph.build_graphs()  # Build the file tree
        self.import_dag = {}  # The nested DAG graph

    def build_import_dag(self) -> Dict[str, Any]:
        """
        Main function to build the import DAG starting from the entry file.
        :return: The nested DAG graph as a dictionary.
        """
        self.import_dag = self._trace_file_imports(self.entry_file)
        return self.import_dag

    def _trace_file_imports(self, file_path: Path, recursion_stack=None) -> Dict[str, Any]:
        """
        Recursively trace the imports of a Python file and build a DAG.
        This version:
        - Uses a recursion stack (list) to detect circular imports;
        - Allows a file to appear multiple times if imported along different branches.
        """
        if recursion_stack is None:
            recursion_stack = []

        # Detect circular import by checking if the file is already in the recursion path
        if file_path in recursion_stack:
            return {}  # Return an empty dict to avoid infinite loops

        # Push current file onto the recursion stack
        recursion_stack.append(file_path)

        # Initialize the DAG for the current file
        dag = {
            "name": str(file_path.relative_to(self.root_path)),
            "imports": [],
            "imported_objects": []  # Add imported objects for the current file
        }


        # Parse imports in this file
        imports = self._get_imports_from_file(file_path)

        # Recursively process each import
        for import_path in imports:
            # If the import is a dictionary, extract module and objects
            if isinstance(import_path, dict):
                module = import_path["module"]
                objects = import_path["objects"]
                resolved_path = self._resolve_import_to_path(module)
            else:
                resolved_path = self._resolve_import_to_path(import_path)
                objects = []  # No specific objects if it's a simple module import

            if resolved_path:
                dag_entry = self._trace_file_imports(resolved_path, recursion_stack) if resolved_path else {}
                dag_entry["imported_objects"] = objects
                if dag_entry and all(child.get("name") != dag_entry.get("name") for child in dag["imports"]):
                    dag["imports"].append(dag_entry)

        # Pop from the stack after processing
        recursion_stack.pop()
        return dag


    def _get_imports_from_file(self, file_path: Path) -> List[str]:
        """
        Parse a Python file and extract all import statements.
        
        :param file_path: The absolute path of the Python file to parse.
        :return: A list of imported module/package names.
        """
        imports = []
        try:
            with open(file_path, "r") as file:
                tree = ast.parse(file.read(), filename=str(file_path))

            # Extract import statements
            for node in ast.walk(tree):
                if isinstance(node, ast.Import):
                    for alias in node.names:
                        imports.append(alias.name)
                elif isinstance(node, ast.ImportFrom):
                    if node.module:
                        imported_objects = [alias.name for alias in node.names]
                        imports.append({"module": node.module, "objects": imported_objects})

        except Exception as e:
            print(f"Failed to parse {file_path}: {e}")

        return imports

    def _resolve_import_to_path(self, import_path: str) -> Path:
        """
        Resolve an import path to a file in the project directory.
        
        :param import_path: The import path as a string (e.g., "product_recommender.api").
        :return: The absolute path of the imported file if found, None otherwise.
        """
        # Convert the import path to a file path (e.g., "product_recommender.api" -> "product_recommender/api.py")
        potential_path = self.root_path / import_path.replace(".", "/") / "__init__.py"  # Handle packages
        if potential_path.exists():
            return potential_path

        potential_path = self.root_path / f"{import_path.replace('.', '/')}.py"  # Handle modules
        if potential_path.exists():
            return potential_path

        # If not found, return None (likely a third-party library or built-in module)
        return None

    def visualize_import_dag(self, dag=None, indent=0) -> None:
        """
        Pretty print the import DAG, including imported objects.

        :param dag: The DAG to print (defaults to the main import_dag).
        :param indent: The current indentation level for pretty printing.
        """
        if dag is None:
            dag = self.import_dag

        # Print the current file name
        print("  " * indent + f"- {dag.get('name', '<unknown>')}")

        # Print the imported objects for this file, if any
        for obj in dag.get("imported_objects", []):
            print("  " * (indent + 1) + f"  * {obj}")

        # Recursively print child imports
        for child in dag.get("imports", []):
            if child:  # Skip empty dictionaries
                self.visualize_import_dag(child, indent + 1)



In [66]:
# Example: root folder "machine-learning-platform", entry file "product_recommender/api.py"
root = "/home/david/Documents/glovo/machine-learning-platform"
entry = "widget_framework/src/widget_builder.py"

# Build the import DAG
import_dag_builder = PythonImportDAG(root, entry)
dag = import_dag_builder.build_import_dag()

In [67]:
dag

{'name': 'widget_framework/src/widget_builder.py',
 'imports': [{'name': 'widget_framework/constants.py',
   'imports': [{'name': 'widget_framework/data/users_staff.py',
     'imports': [],
     'imported_objects': ['user_ids_staff']}],
   'imported_objects': ['USERS_STAFF']},
  {'name': 'widget_framework/src/experiment_string.py',
   'imports': [{'name': 'widget_framework/src/utils.py',
     'imports': [{'name': 'widget_framework/constants.py',
       'imports': [{'name': 'widget_framework/data/users_staff.py',
         'imports': [],
         'imported_objects': ['user_ids_staff']}],
       'imported_objects': ['FS_MAX_RETRIES', 'SLEEP_MS', 'TIMEOUT_IN_MS']}],
     'imported_objects': ['Utils']}],
   'imported_objects': ['ExperimentString']},
  {'name': 'widget_framework/src/user_segmentation/user_segment.py',
   'imports': [{'name': 'widget_framework/src/utils.py',
     'imports': [{'name': 'widget_framework/constants.py',
       'imports': [{'name': 'widget_framework/data/users_sta

In [68]:
# Print the DAG
import_dag_builder.visualize_import_dag(dag)

- widget_framework/src/widget_builder.py
  - widget_framework/constants.py
      * USERS_STAFF
    - widget_framework/data/users_staff.py
        * user_ids_staff
  - widget_framework/src/experiment_string.py
      * ExperimentString
    - widget_framework/src/utils.py
        * Utils
      - widget_framework/constants.py
          * FS_MAX_RETRIES
          * SLEEP_MS
          * TIMEOUT_IN_MS
        - widget_framework/data/users_staff.py
            * user_ids_staff
  - widget_framework/src/user_segmentation/user_segment.py
      * UserSegmentation
    - widget_framework/src/utils.py
        * Utils
      - widget_framework/constants.py
          * FS_MAX_RETRIES
          * SLEEP_MS
          * TIMEOUT_IN_MS
        - widget_framework/data/users_staff.py
            * user_ids_staff
  - widget_framework/src/utils.py
      * ContractInput
      * Utils
    - widget_framework/constants.py
        * FS_MAX_RETRIES
        * SLEEP_MS
        * TIMEOUT_IN_MS
      - widget_framework/dat

In [69]:
# Optionally, export to YAML for further use (e.g., visualization in React/D3.js)
import yaml
with open("import_dag.yaml", "w") as f:
    yaml.dump(dag, f)
    print("\nDAG exported to 'import_dag.yaml'")



DAG exported to 'import_dag.yaml'


# Execution Dependencies

In [70]:
import ast
import os
from pathlib import Path
from typing import Dict, List, Any, Tuple, Optional, Union

class PythonExecutionDependencyGraph:
    """
    Builds a fine-grained dependency graph of Python function/class/method usage.
    
    It traces from a specific 'entry point' (a function, class constructor, or class method)
    in a given file, analyzing only the code paths and symbols that are actually called.
    
    Major steps:
    1. Collect all top-level definitions (functions, classes, methods) in each file.
    2. Collect all imports in each file and map each imported symbol to its source file 
       (if it exists in the project).
    3. From the entry point, parse the function/method body. For each call, figure out which
       symbol is being invoked, whether local or imported, and continue recursively.
    4. Build a nested DAG that lists each node as (file_path, symbol_name) and its children 
       are the next calls in the chain.
    """

    def __init__(self, root_path: str):
        """
        :param root_path: The root directory of the project (absolute or relative).
        """
        self.root_path = Path(root_path).resolve()
        
        # Cache for definitions: { file_path: { "functions": {}, "classes": {} } }
        # Each function/class dictionary will map <symbol_name> -> <AST node or sub-info>
        self.definitions_cache: Dict[Path, Dict[str, Dict[str, Any]]] = {}
        
        # Cache for imports: { file_path: { "symbols": {imported_symbol: source_path}, "modules": {alias: module_path}, ... } }
        # This helps quickly map from an imported symbol to the actual local definition in another file.
        self.imports_cache: Dict[Path, Dict[str, Any]] = {}
        
        # A memo to avoid re-parsing the same file repeatedly
        self.parsed_files: Dict[Path, ast.Module] = {}

    def build_execution_dag(self,
                            entry_file: str,
                            entry_symbol: str) -> Dict[str, Any]:
        """
        Build the dependency graph starting from a specific symbol (function/method/constructor)
        in a specific file.
        
        :param entry_file: The relative path of the file where the entry symbol is defined, 
                           relative to root_path.
        :param entry_symbol: The symbol to trace (e.g., 'my_function', 'MyClass', 'MyClass.my_method').
        :return: Nested dictionary representing the execution DAG.
        """
        entry_path = (self.root_path / entry_file).resolve()
        
        # Parse and index the project so we can resolve references quickly
        self._index_file(entry_path)
        
        # Validate that the entry symbol is known in that file
        # entry_symbol can be something like 'MyClass', or 'MyClass.my_method', or just 'my_function'
        # We'll parse out the top-level piece and possibly a sub-method
        top_level_name, sub_method_name = self._split_symbol(entry_symbol)
        
        if top_level_name not in self.definitions_cache[entry_path]["functions"] \
           and top_level_name not in self.definitions_cache[entry_path]["classes"]:
            raise ValueError(f"Entry symbol '{top_level_name}' not found in {entry_file}")
        
        # Build a DAG node for the entry point
        # We'll store nodes in the form { "file": <path>, "symbol": <symbol>, "calls": [sub-nodes], ... }
        visited = set()  # track visited (file, symbol) to avoid cycles
        dag = self._trace_symbol_usage(entry_path, entry_symbol, visited)
        return dag

    def _trace_symbol_usage(self,
                            file_path: Path,
                            symbol: str,
                            visited: set) -> Dict[str, Any]:
        """
        Recursively build the usage subtree for a single symbol in a file.
        
        :param file_path: Absolute path to the file.
        :param symbol: The symbol to trace (like 'MyClass.my_method' or 'my_function').
        :param visited: A set of visited (file_path, symbol) pairs to prevent infinite loops.
        :return: A dictionary describing the node, e.g.:
                 {
                   "file": "src/moduleA.py",
                   "symbol": "MyClass.my_method",
                   "calls": [ <child_nodes> ]
                 }
        """
        node = {
            "file": str(file_path.relative_to(self.root_path)),
            "symbol": symbol,
            "calls": []
        }
        
        # Detect recursion
        sig = (file_path, symbol)
        if sig in visited:
            return node
        visited.add(sig)
        
        top_level_name, sub_method_name = self._split_symbol(symbol)

        # Figure out whether this is a function or a class
        definitions = self.definitions_cache[file_path]
        if top_level_name in definitions["functions"]:
            # We have a top-level function
            func_node = definitions["functions"][top_level_name]
            calls = self._get_function_calls(file_path, func_node)
            child_nodes = self._resolve_calls(file_path, calls, visited)
            node["calls"] = child_nodes
        
        elif top_level_name in definitions["classes"]:
            # We have a class
            class_info = definitions["classes"][top_level_name]
            
            if sub_method_name:
                # We are specifically tracing e.g. MyClass.my_method
                if sub_method_name not in class_info["methods"]:
                    raise ValueError(f"Method '{sub_method_name}' not found in class '{top_level_name}'")
                method_node = class_info["methods"][sub_method_name]
                calls = self._get_function_calls(file_path, method_node)
                child_nodes = self._resolve_calls(file_path, calls, visited)
                node["calls"] = child_nodes
            else:
                # No sub-method specified; treat as a constructor call or general class usage
                # In some code, "MyClass()" might trigger __init__, or you might just reference class attributes.
                # For simplicity, let's assume it references __init__ if present.
                if "__init__" in class_info["methods"]:
                    method_node = class_info["methods"]["__init__"]
                    calls = self._get_function_calls(file_path, method_node)
                    child_nodes = self._resolve_calls(file_path, calls, visited)
                    node["calls"] = child_nodes
        
        else:
            # Unknown symbol => possibly local variable or undefined import
            # We do nothing because it's not a recognized function/class
            pass
        
        return node

    def _split_symbol(self, symbol: str) -> Tuple[str, Optional[str]]:
        """
        Splits something like 'MyClass.my_method' into ('MyClass', 'my_method') 
        or just 'my_function' into ('my_function', None).
        """
        parts = symbol.split(".")
        if len(parts) == 1:
            return parts[0], None
        else:
            return parts[0], parts[1]

    def _resolve_calls(self,
                       file_path: Path,
                       calls: List[Tuple[str, str]],
                       visited: set) -> List[Dict[str, Any]]:
        """
        Given a list of calls from a function body, return the child DAG nodes
        corresponding to each call (only for local or in-project dependencies).
        
        Each element in 'calls' is (qualifier, func_name). 'qualifier' might be a local variable
        or an imported symbol; 'func_name' is the attribute or method being invoked. 
        If there's no explicit qualifier (i.e. calling a local function by name), it might be (None, 'local_func').
        
        :param file_path: The file in which these calls occur.
        :param calls: A list of calls discovered by `_get_function_calls`.
        :param visited: A set of visited (file, symbol) to avoid cycles.
        :return: A list of child nodes for the DAG.
        """
        children = []
        
        for (qualifier, func_name) in calls:
            if qualifier is None:
                # Means a local function or top-level reference
                # Check if it matches a local function name or a class name
                definitions = self.definitions_cache[file_path]
                if func_name in definitions["functions"]:
                    child_symbol = func_name
                    child_node = self._trace_symbol_usage(file_path, child_symbol, visited)
                    children.append(child_node)
                elif func_name in definitions["classes"]:
                    # Possibly calling a class like a constructor
                    child_symbol = func_name
                    child_node = self._trace_symbol_usage(file_path, child_symbol, visited)
                    children.append(child_node)
                else:
                    # Not found among local definitions => might be built-in or external
                    pass
            else:
                # We have something like imported_symbol.some_method or local_var.some_method
                # We need to see if 'qualifier' is a known import or a local definition
                # If it's an import, see if that import is in project scope, then continue
                # If it's local, we could do alias tracking, but that complicates things a lot
                # For now, assume that if 'qualifier' matches an imported symbol, we jump there
                import_info = self.imports_cache[file_path]
                
                # 'qualifier' might directly match an import from "symbols" or something
                if qualifier in import_info["symbols"]:
                    # Then the source is local to a different file
                    imported_file = import_info["symbols"][qualifier]  # Path to the source
                    # We must ensure that the source file is indexed
                    self._index_file(imported_file)
                    
                    # Now see if the imported file has a class/function named func_name
                    # or if the qualifier was itself a module import
                    definitions = self.definitions_cache[imported_file]
                    
                    # If we do 'from X import MyClass', qualifier=MyClass, then func_name=some_method
                    if func_name in definitions["classes"]:
                        child_symbol = f"{func_name}"
                        # sub-method usage => MyClass.my_method
                        # We'll store it as "MyClass.my_method"
                        # But we only do that if we can confirm a method usage
                        # For now, let's treat it as a method reference. This is approximate:
                        child_symbol = f"{func_name}.{func_name.lower()}"  # naive guess or we might parse the call further
                        # Instead, let's just do "MyClass" so we have a node, and then inside that node 
                        # we would see calls to sub-method. The more precise approach is to store 
                        # the sub-method name (like "MyClass.my_method") if we know it from the call. 
                        #
                        # For clarity, let's assume the function name is the sub-method:
                        # child_symbol = f"{func_name}.{func_name_called}"
                        
                        # But if this call is indeed MyClass() => user is calling constructor => child_symbol = 'MyClass'
                        # or MyClass.my_method => child_symbol = 'MyClass.my_method'
                        child_symbol = f"{func_name}.{func_name}"  # you would refine logic here
                        
                        child_node = self._trace_symbol_usage(imported_file, child_symbol, visited)
                        children.append(child_node)
                    
                    elif func_name in definitions["functions"]:
                        # If the symbol is a top-level function
                        child_symbol = func_name
                        child_node = self._trace_symbol_usage(imported_file, child_symbol, visited)
                        children.append(child_node)
                    else:
                        # Could be a variable import or something external
                        pass
                else:
                    # Possibly a local variable that references some object. 
                    # Full, correct handling would require dataflow analysis 
                    # to see what 'qualifier' is assigned to. That is more complex.
                    pass
        
        return children

    def _get_function_calls(self,
                            file_path: Path,
                            function_node: ast.AST) -> List[Tuple[Optional[str], str]]:
        """
        Inspect the AST of a function or method body, returning a list of calls in the form 
        (qualifier, func_name). For example, "foo.bar()" => (qualifier='foo', func_name='bar').
        A direct call "some_function()" => (None, 'some_function').
        
        This does not do complicated dataflow to track renames or assignments, but can handle
        simple patterns.
        """
        calls = []
        for node in ast.walk(function_node):
            if isinstance(node, ast.Call):
                # If it's a simple call like <Name>(...), e.g. my_func(...)
                if isinstance(node.func, ast.Name):
                    calls.append((None, node.func.id))
                # If it's an attribute reference like <Name>.<attr>(...), e.g. foo.bar(...)
                elif isinstance(node.func, ast.Attribute):
                    # node.func.value might be an ast.Name 
                    if isinstance(node.func.value, ast.Name):
                        qualifier = node.func.value.id  # e.g. 'foo'
                        method = node.func.attr  # e.g. 'bar'
                        calls.append((qualifier, method))
                    # If it's something more nested, we skip or handle further
        return calls

    def _index_file(self, file_path: Path) -> None:
        """
        Parse a file to:
          1) Cache its AST
          2) Identify all top-level functions, classes, and their methods
          3) Identify local imports and store them in self.imports_cache
        """
        if file_path in self.parsed_files:
            return  # Already done

        if not file_path.exists() or not file_path.is_file():
            # Possibly an external import
            return

        try:
            file_content = file_path.read_text(encoding="utf-8")
            tree = ast.parse(file_content, filename=str(file_path))
            self.parsed_files[file_path] = tree
        except Exception as e:
            print(f"Failed to parse {file_path}: {e}")
            return

        # Initialize caches for this file
        self.definitions_cache[file_path] = {
            "functions": {},   # name -> AST node
            "classes": {}      # name -> { "node": ClassDef, "methods": {method_name: AST node} }
        }
        self.imports_cache[file_path] = {
            "symbols": {},     # e.g., { "Utils": Path(...), "MyClass": Path(...) }
            "modules": {}      # e.g., { "sys": None, "requests": None } => external or not found
        }

        # Populate definitions
        for node in tree.body:
            if isinstance(node, ast.FunctionDef):
                # top-level function
                func_name = node.name
                self.definitions_cache[file_path]["functions"][func_name] = node
            elif isinstance(node, ast.ClassDef):
                # top-level class
                class_name = node.name
                methods = {}
                # gather methods
                for body_item in node.body:
                    if isinstance(body_item, ast.FunctionDef):
                        methods[body_item.name] = body_item
                self.definitions_cache[file_path]["classes"][class_name] = {
                    "node": node,
                    "methods": methods
                }

        # Populate imports (only keep references to local files in the project)
        for node in ast.walk(tree):
            if isinstance(node, ast.Import):
                for alias in node.names:
                    imported_name = alias.name  # e.g. 'os', 'my_package.module'
                    as_name = alias.asname if alias.asname else imported_name
                    # Try to resolve
                    imported_path = self._resolve_import_to_path(imported_name)
                    if imported_path:
                        # It's in-project
                        self.imports_cache[file_path]["symbols"][as_name] = imported_path
                    else:
                        # external or not found
                        self.imports_cache[file_path]["modules"][as_name] = None
            elif isinstance(node, ast.ImportFrom):
                if node.module:
                    module_name = node.module  # e.g. 'my_package.module'
                    imported_path = self._resolve_import_to_path(module_name)
                    if imported_path:
                        # It's in-project
                        for alias in node.names:
                            as_name = alias.asname if alias.asname else alias.name
                            self.imports_cache[file_path]["symbols"][as_name] = imported_path
                    else:
                        # external or not found
                        for alias in node.names:
                            as_name = alias.asname if alias.asname else alias.name
                            self.imports_cache[file_path]["modules"][as_name] = None

    def _resolve_import_to_path(self, import_path: str) -> Optional[Path]:
        """
        Attempt to convert an import path (e.g. "my_package.module") into an actual .py file 
        within the project. Return None if it's external or not found.
        """
        # Convert the import path to a file path
        # 1) Check if there's an __init__.py
        candidate = self.root_path / import_path.replace(".", "/") / "__init__.py"
        if candidate.exists():
            return candidate.resolve()

        # 2) Otherwise check module_name.py
        candidate = self.root_path / f"{import_path.replace('.', '/')}.py"
        if candidate.exists():
            return candidate.resolve()

        # Not found inside project
        return None

    def visualize_execution_dag(self, dag: Dict[str, Any], indent: int = 0):
        """
        Pretty print the execution DAG, showing (file, symbol) and nested calls.
        """
        prefix = "  " * indent
        print(f"{prefix}- {dag['file']} :: {dag['symbol']}")
        for child in dag.get("calls", []):
            self.visualize_execution_dag(child, indent + 1)


In [71]:
# Example: root folder "machine-learning-platform", entry file "product_recommender/api.py"
root = "/home/david/Documents/glovo/machine-learning-platform"
entry = "widget_framework/src/widget_builder.py"
entry_symbol = "WidgetBuilder.run"

In [72]:
dep_graph = PythonExecutionDependencyGraph(root)

In [73]:
dag = dep_graph.build_execution_dag(
    entry_file=entry,
    entry_symbol=entry_symbol
)

In [74]:
# 3) Visualize or otherwise use the resulting DAG
dep_graph.visualize_execution_dag(dag)

- widget_framework/src/widget_builder.py :: WidgetBuilder.run
