In [28]:
import os
import json
import yaml
import ast
from pathlib import Path

In [45]:
class ClassAnalyzer(ast.NodeVisitor):
    def __init__(self):
        self.init_mappings = []  # Mappings of attributes in __init__ (e.g., self.classA2 -> ClassA2)
        self.method_names = []  # All method definitions in the class

    def visit_ClassDef(self, node):
        # Process a class definition
        for body_item in node.body:
            if isinstance(body_item, ast.FunctionDef) and body_item.name == "__init__":
                # Visit __init__ method to extract mappings
                self.visit(body_item)
            elif isinstance(body_item, ast.FunctionDef):
                # Record all other method names
                self.method_names.append(body_item.name)

    def visit_FunctionDef(self, node):
        # Process the __init__ method to extract self.<attr> mappings
        if node.name == "__init__":
            for body_item in node.body:
                # Check for assignments like `self.<attr> = SomeClass()`
                if (
                    isinstance(body_item, ast.Assign)
                    and len(body_item.targets) == 1
                    and isinstance(body_item.targets[0], ast.Attribute)  # Check if target is an attribute
                    and isinstance(body_item.targets[0].value, ast.Name)
                    and body_item.targets[0].value.id == "self"  # Ensure it's self.<attr>
                    and isinstance(body_item.value, ast.Call)  # Ensure it's calling a class or function
                    and isinstance(body_item.value.func, ast.Name)  # Ensure the call is named (e.g., ClassA2())
                ):
                    # Extract the attribute name and class/function name
                    attr_name = body_item.targets[0].attr  # e.g., "classA2"
                    class_name = body_item.value.func.id  # e.g., "ClassA2"
                    self.init_mappings.append((attr_name, class_name))


def analyze_class(source_code, class_name):
    tree = ast.parse(source_code)
    analyzer = ClassAnalyzer()
    for node in tree.body:
        if isinstance(node, ast.ClassDef) and node.name == class_name:
            analyzer.visit(node)
            break
    return analyzer.init_mappings, analyzer.method_names

In [50]:
base_path = Path("/home/david/Documents/projects/app-local-rag-repo/")
script_name = "classA.py"
script_path = base_path / script_name
script_content = script_path.resolve().read_text()

In [51]:
init_calls, methods = analyze_class(script_content, "ClassA")

In [48]:
init_calls

[('classA2', 'ClassA2'), ('classB', 'ClassB')]

In [52]:
tree = ast.parse(script_content)

In [53]:
tree

<ast.Module at 0x721e100f0ee0>

In [55]:
def ast_to_dict(node):
    """
    Recursively convert an AST node into a dictionary representation.
    """
    if isinstance(node, ast.AST):
        node_dict = {
            "type": type(node).__name__,  # Type of the AST node (e.g., ClassDef, FunctionDef)
            "fields": {}  # Store all fields and their values
        }
        for field, value in ast.iter_fields(node):
            # Recursively convert fields to a dictionary
            node_dict["fields"][field] = ast_to_dict(value)
        return node_dict
    elif isinstance(node, list):
        # Convert a list of nodes
        return [ast_to_dict(item) for item in node]
    else:
        # Return the value directly (e.g., strings, numbers)
        return node

In [56]:
tree_dict = ast_to_dict(tree)

In [65]:
tree_dict["fields"]["body"][0]

{'type': 'ImportFrom',
 'fields': {'module': 'classB',
  'names': [{'type': 'alias', 'fields': {'name': 'ClassB', 'asname': None}},
   {'type': 'alias', 'fields': {'name': 'ClassB2', 'asname': None}}],
  'level': 0}}

In [64]:
tree_dict["fields"]["body"][1]

{'type': 'ClassDef',
 'fields': {'name': 'ClassA',
  'bases': [],
  'keywords': [],
  'body': [{'type': 'FunctionDef',
    'fields': {'name': '__init__',
     'args': {'type': 'arguments',
      'fields': {'posonlyargs': [],
       'args': [{'type': 'arg',
         'fields': {'arg': 'self', 'annotation': None, 'type_comment': None}}],
       'vararg': None,
       'kwonlyargs': [],
       'kw_defaults': [],
       'kwarg': None,
       'defaults': []}},
     'body': [{'type': 'Expr',
       'fields': {'value': {'type': 'Call',
         'fields': {'func': {'type': 'Name',
           'fields': {'id': 'print', 'ctx': {'type': 'Load', 'fields': {}}}},
          'args': [{'type': 'Constant',
            'fields': {'value': 'ClassA.__init__()', 'kind': None}}],
          'keywords': []}}}},
      {'type': 'Assign',
       'fields': {'targets': [{'type': 'Attribute',
          'fields': {'value': {'type': 'Name',
            'fields': {'id': 'self', 'ctx': {'type': 'Load', 'fields': {}}}},
  

# Go Modular

In [100]:
import os

def collect_py_files(root_dir):
    py_files = []
    for dirpath, _, filenames in os.walk(root_dir):
        for f in filenames:
            if f.endswith('.py'):
                full_path = os.path.join(dirpath, f)
                py_files.append(full_path)
    return py_files


In [103]:
import ast

def build_file_dependency_graph(main_file, all_py_files):
    """
    Returns a dict: file_path -> { 'ast': ..., 'imports': [...] }
    for all files that are imported, directly or indirectly, by main_file.
    """
    to_visit = [main_file]
    visited = set()
    file_graph = {}

    while to_visit:
        current = to_visit.pop()
        if current in visited:
            continue
        visited.add(current)

        # parse AST
        with open(current, 'r') as f:
            src = f.read()
        tree = ast.parse(src, filename=current)

        # identify imports in this file
        imported_files = extract_imports(tree, current, all_py_files)

        # store in file_graph
        file_graph[current] = {
            'ast': tree,
            'imports': imported_files
        }

        # add the newly discovered files to the to_visit
        for imp_file in imported_files:
            if imp_file not in visited:
                to_visit.append(imp_file)

    return file_graph

def extract_imports(tree, current_file, all_py_files):
    """
    Look for 'import X' or 'from X import Y' in the AST, 
    find the corresponding .py file if it exists in `all_py_files`.
    Return a list of paths to those .py files.
    """
    # This can be tricky because Python can import packages vs modules, etc.
    # We'll do a naive approach: just look for 'ImportFrom' (with .module).
    imported_files = []

    for node in tree.body:
        if isinstance(node, ast.ImportFrom) and node.module:
            # e.g. from classB import ClassB, ClassB2
            possible_name = node.module + ".py"
            # you'd need better resolution logic here
            for py_file in all_py_files:
                if py_file.endswith(possible_name):
                    imported_files.append(py_file)
        elif isinstance(node, ast.Import):
            # e.g. import classB
            # This is even more naive – we'd try `classB.py`
            for alias in node.names:
                possible_name = alias.name + ".py"
                for py_file in all_py_files:
                    if py_file.endswith(possible_name):
                        imported_files.append(py_file)
    return imported_files


In [101]:
base_path = "."
files_py = collect_py_files(base_path)

In [111]:
file_graph = build_file_dependency_graph("classA.py", files_py)

In [112]:
file_graph

{'classA.py': {'ast': <ast.Module at 0x721e083127a0>,
  'imports': ['./classB.py']},
 './classB.py': {'ast': <ast.Module at 0x721e08d2abc0>, 'imports': []}}

In [106]:
class ClassSkeleton:
    def __init__(self, file_path, class_name):
        self.file_path = file_path
        self.class_name = class_name
        self.methods = {}  # method_name -> (FunctionDef AST node)
        self.instantiated_attrs = {}  # e.g. { "classA2": "ClassA2", "classB": "ClassB" }

class FileSkeleton:
    def __init__(self, file_path):
        self.file_path = file_path
        self.classes = {}  # class_name -> ClassSkeleton
        # If you want: self.imports = {} or similar


In [107]:
def build_file_skeleton(file_path, tree):
    """
    Given a parsed AST for `file_path`, extract the high-level skeleton:
    classes, methods, attributes in __init__, etc.
    """
    fskel = FileSkeleton(file_path)

    for node in tree.body:
        if isinstance(node, ast.ClassDef):
            class_name = node.name
            cskel = ClassSkeleton(file_path, class_name)

            # Gather methods
            for child in node.body:
                if isinstance(child, ast.FunctionDef):
                    cskel.methods[child.name] = child

            # Look specifically at __init__ to find instantiations
            init_node = cskel.methods.get('__init__')
            if init_node:
                for stmt in init_node.body:
                    # e.g. self.x = SomeClass(...)
                    if (isinstance(stmt, ast.Assign) and stmt.targets and 
                        isinstance(stmt.targets[0], ast.Attribute)):
                        target_attr = stmt.targets[0]
                        if (isinstance(target_attr.value, ast.Name) and
                                target_attr.value.id == 'self'):
                            # e.g. self.classA2 = ClassA2()
                            attr_name = target_attr.attr
                            if isinstance(stmt.value, ast.Call):
                                call_func = stmt.value.func
                                if isinstance(call_func, ast.Name):
                                    # e.g. ClassA2(...)
                                    cskel.instantiated_attrs[attr_name] = call_func.id
                                elif (isinstance(call_func, ast.Attribute) 
                                      and isinstance(call_func.value, ast.Name)):
                                    # e.g. self.something.SomeClass(...) 
                                    # or module.ClassName(...) – handle as needed
                                    pass
            fskel.classes[class_name] = cskel

    return fskel


In [108]:
def build_global_class_index(file_graph):
    """
    file_graph: { file_path: { 'ast':..., 'imports':[...] } }
    Returns: global_index: { (filename, className) : ClassSkeleton }
    plus maybe a simpler index: { className : ClassSkeleton } if you assume no naming conflicts
    """
    global_index = {}
    for file_path, data in file_graph.items():
        tree = data['ast']
        fskel = build_file_skeleton(file_path, tree)
        for cls_name, cskel in fskel.classes.items():
            global_index[(file_path, cls_name)] = cskel

    return global_index


In [113]:
global_index = build_global_class_index(file_graph)

In [114]:
global_index

{('classA.py', 'ClassA'): <__main__.ClassSkeleton at 0x721e08d29a80>,
 ('classA.py', 'ClassA2'): <__main__.ClassSkeleton at 0x721e08d2b430>,
 ('./classB.py', 'ClassB'): <__main__.ClassSkeleton at 0x721e08d2a440>,
 ('./classB.py', 'ClassB2'): <__main__.ClassSkeleton at 0x721e08d2b3a0>}

In [123]:
def extract_calls_from_method(method_node: ast.FunctionDef, 
                              class_skel: ClassSkeleton, 
                              global_index):
    """
    Extracts calls from a method and skips duplicate constructor calls.
    """
    calls = []
    local_vars = {}  # Tracks local variable -> class type assignments
    visited_constructors = set()  # Tracks constructors already processed

    for stmt in method_node.body:
        # Handle assignments (e.g., `classA2 = ClassA2()`)
        if isinstance(stmt, ast.Assign) and len(stmt.targets) == 1:
            target = stmt.targets[0]
            if isinstance(target, ast.Name):
                # Check if the value is a constructor call
                if isinstance(stmt.value, ast.Call) and isinstance(stmt.value.func, ast.Name):
                    called_class = stmt.value.func.id
                    possible_skel = lookup_class_by_name(called_class, global_index)

                    if possible_skel and called_class not in visited_constructors:
                        # Mark this constructor as visited
                        visited_constructors.add(called_class)

                        # Add a constructor call
                        calls.append({
                            'type': 'constructor',
                            'class': (possible_skel.file_path, possible_skel.class_name),
                            'method': '__init__'
                        })

                        # Track local variable assignment
                        local_vars[target.id] = (possible_skel.file_path, possible_skel.class_name)

        # Handle calls (e.g., `classA2.method1()`)
        if isinstance(stmt, (ast.Assign, ast.Expr)):
            value = stmt.value if isinstance(stmt, ast.Assign) else stmt.value
            if isinstance(value, ast.Call):
                call_info = resolve_call(value, class_skel, local_vars, global_index)
                if call_info:
                    calls.append(call_info)

    return calls


def resolve_call(call_node: ast.Call, class_skel, local_vars, global_index):
    """
    Attempt to figure out if call_node is:
      - a method call on a local var (classA2.method1(...))
      - a method call on self.<attr> (self.classA2.method1(...))
      - a static call (ClassB2.method3(...))
      - a direct function name (print(...))
      - a constructor call (ClassA2(...)) – though we might've handled that in the assignment logic
    Return a small dict describing it or None if unknown.
    """

    func_ast = call_node.func

    # Case A: Attribute call => something.method(...)
    if isinstance(func_ast, ast.Attribute):
        # who is 'something'?
        owner_ast = func_ast.value
        method_name = func_ast.attr

        # A1: If `something` is a Name, e.g. classA2.method1(...)
        if isinstance(owner_ast, ast.Name):
            var_name = owner_ast.id
            # see if var_name is in local_vars (so we know the class)
            if var_name in local_vars:
                file_class = local_vars[var_name]  # (file_path, class_name)
                return {
                    'type': 'method',
                    'class': file_class,
                    'method': method_name
                }
            else:
                # It's possible that var_name is a global or something else
                # check if var_name is the name of a known class => then it's a static method
                possible_skel = lookup_class_by_name(var_name, global_index)
                if possible_skel:
                    return {
                        'type': 'method',
                        'class': (possible_skel.file_path, possible_skel.class_name),
                        'method': method_name
                    }
                else:
                    # We don't know
                    return {
                        'type': 'unknown',
                        'call': ast.dump(call_node)
                    }

        # A2: If `something` is an Attribute, e.g. self.classA2
        elif (isinstance(owner_ast, ast.Attribute) 
              and isinstance(owner_ast.value, ast.Name) 
              and owner_ast.value.id == 'self'):
            # e.g. self.classA2.method1(...)
            attr_name = owner_ast.attr  # classA2
            # see if we know from the skeleton's instantiated_attrs
            if attr_name in class_skel.instantiated_attrs:
                class_name = class_skel.instantiated_attrs[attr_name]
                possible_skel = lookup_class_by_name(class_name, global_index)
                if possible_skel:
                    return {
                        'type': 'method',
                        'class': (possible_skel.file_path, possible_skel.class_name),
                        'method': method_name
                    }
            # else, unknown
            return {
                'type': 'unknown',
                'call': ast.dump(call_node)
            }

    # Case B: Name call => e.g. print(...), or ClassA2(...)
    elif isinstance(func_ast, ast.Name):
        callee_id = func_ast.id

        # Exclude built-in functions
        if callee_id in dir(__builtins__):
            return None  # Skip adding this call to the chain

        # Check if it's a known class (constructor call) or a defined function
        possible_skel = lookup_class_by_name(callee_id, global_index)
        if possible_skel:
            return {
                'type': 'constructor',
                'class': (possible_skel.file_path, possible_skel.class_name),
                'method': '__init__'
            }
        else:
            # Assume it's a top-level function (if defined in the global index)
            return {
                'type': 'function',
                'name': callee_id
            }


    # otherwise, unknown
    return {
        'type': 'unknown',
        'call': ast.dump(call_node)
    }

def lookup_class_by_name(name, global_index):
    """
    If you assume class names are unique, you can do a simple map:
    className -> ClassSkeleton
    Otherwise you need (file, name) indexing or better logic.
    """
    for (file_path, class_name), cskel in global_index.items():
        if class_name == name:
            return cskel
    return None


In [124]:
class Node:
    def __init__(self, description):
        self.description = description
        self.next_node = None

def build_call_chain(file_path, class_name, method_name, global_index, visited=None):
    """
    Return the head Node of a singly-linked list of calls discovered
    from (file_path, class_name.method_name).
    visited: set of (file_path, class_name, method_name) to avoid infinite loops
    """
    if visited is None:
        visited = set()

    head = Node(f"{class_name}.{method_name}")
    current_node = head

    # 1) find the ClassSkeleton
    cskel = global_index.get((file_path, class_name))
    if not cskel:
        return head  # no children

    # 2) get the method node
    method_node = cskel.methods.get(method_name)
    if not method_node:
        return head

    # 3) extract calls
    calls = extract_calls_from_method(method_node, cskel, global_index)
    for call_info in calls:
        # create node description
        desc = None
        next_method_node = None

        if call_info['type'] == 'constructor':
            # e.g. "ClassA2.__init__"
            target = call_info['class']
            (t_file, t_class) = target
            desc = f"{t_class}.__init__"
            next_method_node = (t_file, t_class, '__init__')

        elif call_info['type'] == 'method':
            (t_file, t_class) = call_info['class']
            desc = f"{t_class}.{call_info['method']}"
            next_method_node = (t_file, t_class, call_info['method'])

        elif call_info['type'] == 'function':
            desc = f"Function: {call_info['name']}"
            next_method_node = None

        elif call_info['type'] == 'unknown':
            desc = f"Unknown call: {call_info.get('call', '')}"
            next_method_node = None

        new_node = Node(desc)
        current_node.next_node = new_node
        current_node = new_node

        # If we want to *inline* the chain from next_method_node (recursive), do so:
        if next_method_node and next_method_node not in visited:
            visited.add(next_method_node)
            sub_file, sub_class, sub_meth = next_method_node
            sub_chain = build_call_chain(sub_file, sub_class, sub_meth, global_index, visited)
            # Link the chain right after new_node
            # So new_node -> head of sub_chain -> ...
            # Then we skip ahead to the *end* of that sub_chain for continuing
            if sub_chain is not None and sub_chain.next_node is not None:
                new_node.next_node = sub_chain.next_node  # skip sub_chain's head because it's a duplicate name
                # walk to the end of sub_chain
                tmp = new_node
                while tmp.next_node:
                    tmp = tmp.next_node
                current_node = tmp

    return head


In [125]:
# Step 4: Call `build_call_chain` for `ClassA.method1` in `classA.py`
file_path = "classA.py"
class_name = "ClassA"
method_name = "method1"

In [129]:
global_index

{('classA.py', 'ClassA'): <__main__.ClassSkeleton at 0x721e08d29a80>,
 ('classA.py', 'ClassA2'): <__main__.ClassSkeleton at 0x721e08d2b430>,
 ('./classB.py', 'ClassB'): <__main__.ClassSkeleton at 0x721e08d2a440>,
 ('./classB.py', 'ClassB2'): <__main__.ClassSkeleton at 0x721e08d2b3a0>}

In [128]:
# Build the linked list representing the call chain
call_chain = build_call_chain(file_path, class_name, method_name, global_index)

In [127]:
# Step 5: Traverse and print the call chain
current_node = call_chain
while current_node is not None:
    print(current_node.description)
    current_node = current_node.next_node

ClassA.method1
ClassA2.__init__
ClassA2.__init__
ClassA2.method1
ClassB2.method3
