In [1]:
from typing import List, Dict, Tuple, Optional

In [12]:
class BaseNode:
    def __init__(self, name: str):
        self.name = name
        # Hierarchical children (e.g., ScriptNode -> ClassNode -> MethodNode)
        self.children: List[BaseNode] = []
        # Dependency edges: list of tuples (edge_type, target_node)
        self.dependencies: List[Tuple[str, 'BaseNode']] = []
    
    def add_child(self, node: 'BaseNode'):
        self.children.append(node)
    
    def add_dependency(self, target: 'BaseNode', dep_type: str):
        self.dependencies.append((dep_type, target))

    def __repr__(self):
        return f"{self.__class__.__name__}(name={self.name})"


class FolderNode(BaseNode):
    def __init__(self, name: str, parent: Optional['FolderNode'] = None, full_path: str = ""):
        super().__init__(name)
        self.parent = parent
        # Full path relative to the project root.
        self.full_path = full_path if full_path else name
        # Dictionaries to hold child folders and scripts.
        self.folders: Dict[str, 'FolderNode'] = {}
        self.scripts: Dict[str, 'ScriptNode'] = {}
    
    def add_folder(self, folder_node: 'FolderNode'):
        self.folders[folder_node.name] = folder_node
        self.add_child(folder_node)
    
    def add_script(self, script_node: 'ScriptNode'):
        self.scripts[script_node.name] = script_node
        self.add_child(script_node)

class ScriptNode(BaseNode):
    def __init__(self, name: str, full_path: str, parent_folder: Optional[FolderNode]):
        super().__init__(name)
        # Full relative path to the script (e.g. "folder1/folder2/scriptA.py")
        self.full_path = full_path
        self.parent_folder = parent_folder
        # Classes defined in this script, keyed by class name.
        self.classes: Dict[str, ClassNode] = {}
        # Mapping of imported class aliases/names to their originating script full path.
        self.imports: Dict[str, str] = {}
    
    def add_class(self, class_node: 'ClassNode'):
        self.classes[class_node.name] = class_node
        self.add_child(class_node)


class ClassNode(BaseNode):
    def __init__(self, name: str, parent_script: ScriptNode):
        super().__init__(name)
        self.parent_script = parent_script
        # Methods defined in this class, keyed by method name.
        self.methods: Dict[str, MethodNode] = {}
        # Optionally, track any aliases under which the class might be imported.
        self.aliases: List[str] = []
    
    def add_method(self, method_node: 'MethodNode'):
        self.methods[method_node.name] = method_node
        self.add_child(method_node)


class MethodNode(BaseNode):
    def __init__(self, name: str, parent_class: Optional[ClassNode] = None):
        super().__init__(name)
        self.parent_class = parent_class
        # Additional properties: parameters, a summary, etc.
        self.params: List[str] = []
        self.variables: List[str] = []
        self.summary: str = ""

In [13]:
class DependencyGraphBuilder:
    def __init__(self, global_path: str, mapping: Dict):
        """
        :param global_path: Absolute path to the project root.
        :param mapping: Hierarchical dictionary representing the project.
            Expected format (example):
            
            {
              "folders": {
                  "folder1": {
                      "folders": {
                          "folder2": {
                              "scripts": {
                                  "scriptA.py": { 
                                      "classes": [
                                          {
                                              "className": "ClassA",
                                              "methods": ["__init__", "method1", "method2"],
                                              "dependencies": {
                                                  "method1": [
                                                      {"dep_type": "method_call", "target": "ClassA2.method1"},
                                                      {"dep_type": "method_call", "target": "ClassB2.method3"}
                                                  ]
                                              }
                                          },
                                          {
                                              "className": "ClassA2",
                                              "methods": ["__init__", "method1", "method2", "method3"],
                                              "dependencies": {}
                                          }
                                      ],
                                      "imports": {"ClassB": "folder1.folder3.scriptB.py", "ClassB2": "folder1.folder3.scriptB.py"}
                                  }
                              }
                          },
                          "folder3": {
                              "scripts": {
                                  "scriptB.py": {
                                      "classes": [
                                          {
                                              "className": "ClassB",
                                              "methods": ["__init__", "method1", "method2"],
                                              "dependencies": {}
                                          },
                                          {
                                              "className": "ClassB2",
                                              "methods": ["__init__", "method1", "method2", "method3"],
                                              "dependencies": {}
                                          }
                                      ],
                                      "imports": {}
                                  }
                              }
                          }
                      },
                      "scripts": {
                          "script1Z.py": {
                              "classes": [
                                  {
                                      "className": "Script1ZClass",
                                      "methods": ["__init__", "method1"],
                                      "dependencies": {}
                                  }
                              ],
                              "imports": {}
                          }
                      }
                  }
              },
              "scripts": {
                  "scriptC.py": {
                      "classes": [
                          {
                              "className": "ScriptCClass",
                              "methods": ["__init__", "run"],
                              "dependencies": {}
                          }
                      ],
                      "imports": {}
                  }
              }
            }
        """
        self.global_path = global_path
        self.mapping = mapping  # The hierarchical project mapping.
        
        # The root folder node representing the project root.
        self.root_folder = FolderNode("project_root", full_path="")
        
        # Global lookup for script nodes: full relative path -> ScriptNode.
        self.script_lookup: Dict[str, ScriptNode] = {}
        # Lookups for class and method nodes.
        self.class_lookup: Dict[Tuple[str, str], ClassNode] = {}       # (script_full_path, class)
        self.method_lookup: Dict[Tuple[str, str, str], MethodNode] = {}  # (script_full_path, class, method)
    
    def build_folder_tree(self, mapping: Dict, parent_folder: FolderNode, curr_path: str):
        # Process scripts at this level, if any.
        scripts = mapping.get("scripts", {})
        for script_name, script_info in scripts.items():
            full_path = f"{curr_path}/{script_name}" if curr_path else script_name
            script_node = ScriptNode(script_name, full_path, parent_folder)
            # Set imports from the mapping.
            script_node.imports = script_info.get("imports", {})
            parent_folder.add_script(script_node)
            self.script_lookup[full_path] = script_node
        
        # Process child folders.
        folders = mapping.get("folders", {})
        for folder_name, folder_mapping in folders.items():
            full_path = f"{curr_path}/{folder_name}" if curr_path else folder_name
            folder_node = FolderNode(folder_name, parent_folder, full_path)
            parent_folder.add_folder(folder_node)
            self.build_folder_tree(folder_mapping, folder_node, full_path)
    
    def build_graph(self):
        # First, build the folder/script tree from the hierarchical mapping.
        self.build_folder_tree(self.mapping, self.root_folder, "")
        
        # Now, for each script node in the global lookup, process its classes and methods.
        for script_full_path, script_node in self.script_lookup.items():
            script_info = self.get_script_mapping(script_full_path, self.mapping)
            if not script_info:
                continue
            for class_info in script_info.get("classes", []):
                class_name = class_info["className"]
                class_node = ClassNode(class_name, script_node)
                script_node.add_class(class_node)
                self.class_lookup[(script_full_path, class_name)] = class_node
                for method_name in class_info.get("methods", []):
                    method_node = MethodNode(method_name, class_node)
                    class_node.add_method(method_node)
                    self.method_lookup[(script_full_path, class_name, method_name)] = method_node
        
        # Add dependency edges between method nodes.
        for script_full_path, script_node in self.script_lookup.items():
            script_info = self.get_script_mapping(script_full_path, self.mapping)
            if not script_info:
                continue
            for class_info in script_info.get("classes", []):
                class_name = class_info["className"]
                method_deps = class_info.get("dependencies", {})
                for method_name, deps in method_deps.items():
                    source_node = self.method_lookup.get((script_full_path, class_name, method_name))
                    if not source_node:
                        continue
                    for dep in deps:
                        dep_type = dep["dep_type"]
                        target_str = dep["target"]  # Format: "TargetClass.targetMethod"
                        try:
                            target_class, target_method = target_str.split(".")
                        except ValueError:
                            continue
                        # Resolve the target script.
                        target_script_full = script_full_path  # Default: same script.
                        # If target_class is not defined in the current script, use the imports mapping.
                        if target_class not in script_node.classes:
                            imp = script_node.imports.get(target_class)
                            if imp:
                                target_script_full = imp  # Here we expect the full relative path like "folder1/folder3/scriptB.py".
                        target_node = self.method_lookup.get((target_script_full, target_class, target_method))
                        if target_node:
                            source_node.add_dependency(target_node, dep_type)
                        else:
                            print(f"Warning: Unable to resolve dependency {target_str} for {script_full_path}:{class_name}.{method_name}")
    
    def get_script_mapping(self, script_full_path: str, mapping: Dict) -> Optional[Dict]:
        """
        Recursively search the hierarchical mapping for a script matching the given full path.
        The full path is assumed to be delimited by '/'.
        """
        parts = script_full_path.split("/")
        current = mapping
        # Traverse folders using the parts except the last which is the script name.
        for part in parts[:-1]:
            current = current.get("folders", {}).get(part)
            if current is None:
                return None
        return current.get("scripts", {}).get(parts[-1])
    
    def get_entrypoint(self, script_full_path: str, class_name: str, method_name: str) -> Optional[MethodNode]:
        return self.method_lookup.get((script_full_path, class_name, method_name))


In [14]:
d_project_structure = {
    "folders": {
        "folder1": {
            "folders": {
                "folder2": {
                    "folders": {},
                    "scripts": {
                        "scriptA.py": {
                            "classes": [
                                {
                                    "className": "ClassA",
                                    "methods": ["__init__", "method1", "method2"],
                                    "dependencies": {
                                        "method1": [
                                            {"dep_type": "method_call", "target": "ClassA2.method1"},
                                            {"dep_type": "method_call", "target": "ClassB2.method3"}
                                        ]
                                    }
                                },
                                {
                                    "className": "ClassA2",
                                    "methods": ["__init__", "method1", "method2", "method3"],
                                    "dependencies": {}
                                }
                            ],
                            "imports": {"ClassB": "folder1/folder3/scriptB.py", "ClassB2": "folder1/folder3/scriptB.py"}
                        }
                    }
                },
                "folder3": {
                    "folders": {},
                    "scripts": {
                        "scriptB.py": {
                            "classes": [
                                {
                                    "className": "ClassB",
                                    "methods": ["__init__", "method1", "method2"],
                                    "dependencies": {}
                                },
                                {
                                    "className": "ClassB2",
                                    "methods": ["__init__", "method1", "method2", "method3"],
                                    "dependencies": {}
                                }
                            ],
                            "imports": {}
                        }
                    }
                }
            },
            "scripts": {
                "script1Z.py": {
                    "classes": [
                        {
                            "className": "Script1ZClass",
                            "methods": ["__init__", "method1"],
                            "dependencies": {}
                        }
                    ],
                    "imports": {}
                }
            }
        }
    },
    "scripts": {
        "scriptC.py": {
            "classes": [
                {
                    "className": "ScriptCClass",
                    "methods": ["__init__", "run"],
                    "dependencies": {}
                }
            ],
            "imports": {}
        }
    }
}


In [15]:
# Create the builder with the project root and the hierarchical mapping.
builder = DependencyGraphBuilder("/home/app-local-rag-repo/", d_project_structure)
builder.build_graph()

In [16]:
builder.mapping

{'folders': {'folder1': {'folders': {'folder2': {'folders': {},
     'scripts': {'scriptA.py': {'classes': [{'className': 'ClassA',
         'methods': ['__init__', 'method1', 'method2'],
         'dependencies': {'method1': [{'dep_type': 'method_call',
            'target': 'ClassA2.method1'},
           {'dep_type': 'method_call', 'target': 'ClassB2.method3'}]}},
        {'className': 'ClassA2',
         'methods': ['__init__', 'method1', 'method2', 'method3'],
         'dependencies': {}}],
       'imports': {'ClassB': 'folder1/folder3/scriptB.py',
        'ClassB2': 'folder1/folder3/scriptB.py'}}}},
    'folder3': {'folders': {},
     'scripts': {'scriptB.py': {'classes': [{'className': 'ClassB',
         'methods': ['__init__', 'method1', 'method2'],
         'dependencies': {}},
        {'className': 'ClassB2',
         'methods': ['__init__', 'method1', 'method2', 'method3'],
         'dependencies': {}}],
       'imports': {}}}}},
   'scripts': {'script1Z.py': {'classes': [{'cl

In [17]:
# Define the entrypoint: for example, ClassA.method1 in classA.py.
entry_method = builder.get_entrypoint("folder1/folder2/scriptA.py", "ClassA", "method1")

In [19]:
if entry_method:
    print(f"Entrypoint: {entry_method.parent_class.name}.{entry_method.name} in {entry_method.parent_class.parent_script.full_path}")
    for dep_type, dep in entry_method.dependencies:
        print(f"  - {dep_type}: {dep.parent_class.name}.{dep.name} (from {dep.parent_class.parent_script.full_path})")
else:
    print("Entrypoint not found")

Entrypoint: ClassA.method1 in folder1/folder2/scriptA.py
  - method_call: ClassA2.method1 (from folder1/folder2/scriptA.py)
  - method_call: ClassB2.method3 (from folder1/folder3/scriptB.py)


In [9]:
entry_method.parent_class.parent_script

ScriptNode(name=classA.py)

In [10]:
entry_method.parent_class.parent_script.children[1].methods["method1"]

MethodNode(name=method1)