# Zeeguu Architecture Reconstruction using PyCallGraph

This notebook performs dynamic architecture reconstruction for the Zeeguu project using PyCallGraph. We analyze both the API (backend) and Frontend components to understand their runtime behavior and dependencies.

Note: Ensure that [Graphviz](https://graphviz.org/download/) is installed and available on your system PATH. On Windows, you may need to download the installer and add the Graphviz bin folder to your PATH.

In [1]:
import sys
import os

# Set the working directory to the notebook's directory (if running in VSCode/Jupyter)
notebook_dir = os.path.dirname(os.path.abspath('__file__')) if '__file__' in globals() else os.getcwd()
os.chdir(notebook_dir)
print(f"Working directory set to: {os.getcwd()}")

# Function to install packages using the current Python interpreter
def install_package(package):
    try:
        __import__(package)
        print(f"{package} is already installed.")
    except ImportError:
        print(f"Installing {package}...")
        !"{sys.executable}" -m pip install {package}

# Install required packages
install_package('pycallgraph2')
install_package('matplotlib')
install_package('networkx')

# Reminder: Ensure Graphviz is installed on your system for generating graphs.

In [2]:
# Import necessary libraries
import importlib
import matplotlib.pyplot as plt
import networkx as nx

try:
    from pycallgraph2 import PyCallGraph
    from pycallgraph2.output import GraphvizOutput
    from pycallgraph2.config import Config
    from pycallgraph2.globbing_filter import GlobbingFilter
except ImportError as e:
    print("Error: pycallgraph2 module not found. Make sure it is installed and restart the kernel.")
    raise e

## 1. Setup and Path Management

Define functions to set up paths so that the Zeeguu projects can be found. We assume the projects are in `Data/api` and `Data/frontend` relative to the project root.

In [3]:
def setup_paths():
    """Set up paths to Zeeguu API and Frontend projects"""
    current_dir = os.getcwd()
    
    # Navigate to the parent directory of Tools if needed
    if os.path.basename(current_dir).lower() == 'tools':
        parent_dir = os.path.dirname(current_dir)
    else:
        parent_dir = current_dir
    
    api_path = os.path.join(parent_dir, 'Data', 'api')
    frontend_path = os.path.join(parent_dir, 'Data', 'frontend')
    
    if not os.path.exists(api_path):
        print(f"Warning: API path not found at {api_path}")
    if not os.path.exists(frontend_path):
        print(f"Warning: Frontend path not found at {frontend_path}")
    
    # Add paths to sys.path for module imports
    if api_path not in sys.path:
        sys.path.append(api_path)
    if frontend_path not in sys.path:
        sys.path.append(frontend_path)
    
    return api_path, frontend_path

api_path, frontend_path = setup_paths()
print(f"API path: {api_path}")
print(f"Frontend path: {frontend_path}")

API path: ...\Data\api
Frontend path: ...\Data\frontend


## 2. PyCallGraph Basic Configuration

Configure PyCallGraph to focus on the modules of interest by filtering based on module prefixes.

In [4]:
def configure_pycallgraph(module_prefix):
    """Configure PyCallGraph to include only modules with the given prefix"""
    config = Config()
    config.trace_filter = GlobbingFilter(include=[f"{module_prefix}*"])
    
    graphviz = GraphvizOutput(
        output_file=f"{module_prefix}_call_graph.png",
        font_size=10,
        node_color=lambda func_name: 'red' if func_name.startswith(module_prefix) else 'grey',
        edge_color=lambda from_func, to_func: 'black',
        drawer='dot'  
    )
    
    return config, graphviz

## 3. Scanning the Project Structure

Identify Python modules in both the API and Frontend projects to understand the project structure.

In [5]:
def scan_project_structure(project_path, exclusions=None):
    """Scan a project directory to find Python modules"""
    if exclusions is None:
        exclusions = ['__pycache__', '.git', '.venv', 'env', 'venv', 'tests']
    
    modules = []
    
    for root, dirs, files in os.walk(project_path):
        dirs[:] = [d for d in dirs if d not in exclusions]
        for file in files:
            if file.endswith('.py'):
                rel_path = os.path.relpath(os.path.join(root, file), project_path)
                module_path = os.path.splitext(rel_path.replace(os.sep, '.'))[0]
                modules.append(module_path)
    
    modules.sort()
    return modules

api_modules = scan_project_structure(api_path)
frontend_modules = scan_project_structure(frontend_path)

print(f"Found {len(api_modules)} modules in API project")
print(f"Found {len(frontend_modules)} modules in Frontend project")

print("\nExample API modules:")
for module in api_modules[:10]:
    print(f"  - {module}")

print("\nExample Frontend modules:")
for module in frontend_modules[:10]:
    print(f"  - {module}")

Found X modules in API project
Found Y modules in Frontend project

Example API modules:
  - module1
  - module2

Example Frontend modules:
  - moduleA
  - moduleB


## 4. Identify Main Modules/Entry Points

We search for common patterns (like `app`, `main`, `run`, etc.) to identify potential entry points for the projects.

In [6]:
def identify_entry_points(modules):
    """Identify potential entry points in a list of modules"""
    entry_candidates = []
    
    for module in modules:
        if any(pattern in module.lower() for pattern in ['app', 'main', 'run', 'server', 'api', 'wsgi']):
            entry_candidates.append(module)
    
    return entry_candidates

api_entry_points = identify_entry_points(api_modules)
frontend_entry_points = identify_entry_points(frontend_modules)

print("Potential API entry points:")
for entry in api_entry_points:
    print(f"  - {entry}")

print("\nPotential Frontend entry points:")
for entry in frontend_entry_points:
    print(f"  - {entry}")

Potential API entry points:
  - ...

Potential Frontend entry points:
  - ...


## 5. Package Hierarchy Analysis

Extract and visualize the package hierarchy using NetworkX and Matplotlib.

In [7]:
def extract_package_hierarchy(modules):
    """Extract package hierarchy from a list of modules"""
    packages = {}
    for module in modules:
        parts = module.split('.')
        current = packages
        for part in parts:
            if part not in current:
                current[part] = {}
            current = current[part]
    return packages

def visualize_package_hierarchy(packages, name="package_hierarchy"):
    """Create a graph visualization of the package hierarchy"""
    G = nx.DiGraph()
    
    def add_nodes_edges(current_pkg, parent=None):
        for pkg_name, children in current_pkg.items():
            G.add_node(pkg_name)
            if parent:
                G.add_edge(parent, pkg_name)
            add_nodes_edges(children, pkg_name)
    
    add_nodes_edges(packages)
    
    plt.figure(figsize=(12, 10))
    pos = nx.spring_layout(G, k=0.15, iterations=20)
    nx.draw(G, pos, with_labels=True, node_color='lightblue', 
            font_size=10, node_size=2000, alpha=0.8)
    plt.title(f"{name.capitalize()} Hierarchy")
    plt.savefig(f"{name}.png")
    plt.show()

api_packages = extract_package_hierarchy(api_modules)
frontend_packages = extract_package_hierarchy(frontend_modules)

visualize_package_hierarchy(api_packages, "api_package_hierarchy")
visualize_package_hierarchy(frontend_packages, "frontend_package_hierarchy")

## 6. Dynamic Analysis with PyCallGraph

Generate call graphs for key modules using PyCallGraph. This cell defines a function to run the analysis on a specified module.

In [8]:
def run_pycallgraph_on_module(module_name, project_path):
    """Run PyCallGraph on a specific module"""
    root_package = module_name.split('.')[0]
    config, graphviz = configure_pycallgraph(root_package)
    
    if project_path not in sys.path:
        sys.path.insert(0, project_path)
    
    try:
        print(f"Attempting to import {module_name}...")
        module = importlib.import_module(module_name)
        
        with PyCallGraph(output=graphviz, config=config):
            if hasattr(module, 'main'):
                print(f"Executing {module_name}.main()")
                module.main()
            else:
                print(f"Module {module_name} has no main() function, executing module directly")
        
        print(f"Call graph generated: {root_package}_call_graph.png")
        return True
    except Exception as e:
        print(f"Error processing module {module_name}: {str(e)}")
        return False

def analyze_multiple_modules(entry_points, project_path, max_modules=5):
    """Try to analyze multiple entry points"""
    successful = 0
    for i, module in enumerate(entry_points):
        if i >= max_modules:
            break
        print(f"\nAnalyzing module {i+1}/{min(len(entry_points), max_modules)}: {module}")
        if run_pycallgraph_on_module(module, project_path):
            successful += 1
    print(f"\nAnalysis complete. Successfully analyzed {successful}/{min(len(entry_points), max_modules)} modules.")

print("Analyzing API modules:")
analyze_multiple_modules(api_entry_points, api_path)

print("\nAnalyzing Frontend modules:")
analyze_multiple_modules(frontend_entry_points, frontend_path)

## 7. Function Tracing

Trace specific functions to better understand execution flow. Use this helper to generate call graphs for individual functions.

In [9]:
def trace_specific_function(module_name, function_name, project_path, args=None):
    """Trace a specific function using PyCallGraph"""
    if args is None:
        args = []
    root_package = module_name.split('.')[0]
    config, graphviz = configure_pycallgraph(root_package)
    graphviz.output_file = f"{module_name}_{function_name}_call_graph.png"
    
    if project_path not in sys.path:
        sys.path.insert(0, project_path)
    
    try:
        module = importlib.import_module(module_name)
        function = getattr(module, function_name)
        with PyCallGraph(output=graphviz, config=config):
            result = function(*args)
        print(f"Function traced: {module_name}.{function_name}")
        print(f"Call graph generated: {graphviz.output_file}")
        return True
    except Exception as e:
        print(f"Error tracing function {module_name}.{function_name}: {str(e)}")
        return False

## 8. Module Structure Analysis

Analyze a module to list its functions and classes. This helps identify key components even when the module is not directly executable.

In [10]:
def analyze_module_structure(module_name, project_path):
    """Analyze the structure of a module to identify its functions and classes"""
    if project_path not in sys.path:
        sys.path.insert(0, project_path)
    try:
        module = importlib.import_module(module_name)
        attrs = dir(module)
        classes = []
        functions = []
        for attr in attrs:
            if attr.startswith('_'):
                continue
            attr_value = getattr(module, attr)
            if isinstance(attr_value, type):
                classes.append(attr)
            elif callable(attr_value):
                functions.append(attr)
        print(f"\nModule: {module_name}")
        print(f"Classes ({len(classes)}): {', '.join(classes)}")
        print(f"Functions ({len(functions)}): {', '.join(functions)}")
        return classes, functions
    except Exception as e:
        print(f"Error analyzing module {module_name}: {str(e)}")
        return [], []

# Example analysis on key modules (customize filters as needed)
api_key_modules = [mod for mod in api_modules if 'model' in mod.lower() or 'api' in mod.lower()]
frontend_key_modules = [mod for mod in frontend_modules if 'view' in mod.lower() or 'component' in mod.lower()]

print("Analyzing key API modules:")
for module in api_key_modules[:5]:
    analyze_module_structure(module, api_path)

print("\nAnalyzing key Frontend modules:")
for module in frontend_key_modules[:5]:
    analyze_module_structure(module, frontend_path)

## 9. Test File Analysis

Identify and analyze test files to gain further insight into the architecture.

In [11]:
def find_test_files(project_path):
    """Find all test files in a project"""
    test_files = []
    for root, dirs, files in os.walk(project_path):
        for file in files:
            if file.startswith('test_') and file.endswith('.py'):
                rel_path = os.path.relpath(os.path.join(root, file), project_path)
                module_path = os.path.splitext(rel_path.replace(os.sep, '.'))[0]
                test_files.append(module_path)
    return test_files

api_test_files = find_test_files(api_path)
frontend_test_files = find_test_files(frontend_path)

print(f"Found {len(api_test_files)} test files in API project")
print(f"Found {len(frontend_test_files)} test files in Frontend project")

print("\nExample API test files:")
for test in api_test_files[:5]:
    print(f"  - {test}")

print("\nExample Frontend test files:")
for test in frontend_test_files[:5]:
    print(f"  - {test}")

Found X test files in API project
Found Y test files in Frontend project

Example API test files:
  - test_module1
  - test_module2

Example Frontend test files:
  - test_moduleA
  - test_moduleB


## 10. Generate Call Graphs from Test Files

Trace test executions to generate call graphs for further architectural insights.

In [12]:
def analyze_test_file(test_module, project_path):
    """Run a test module and generate a call graph to understand architecture"""
    root_package = test_module.split('.')[0]
    config = Config()
    config.trace_filter = GlobbingFilter(include=[f"{root_package}*"], exclude=["*.test_*"])
    
    graphviz = GraphvizOutput(
        output_file=f"{test_module}_call_graph.png",
        font_size=8,
        node_color=lambda func_name: 'red' if root_package in func_name else 'grey',
        edge_color=lambda caller, callee: 'black',
        drawer='dot'
    )
    
    if project_path not in sys.path:
        sys.path.insert(0, project_path)
    
    try:
        print(f"Attempting to import {test_module}...")
        module = importlib.import_module(test_module)
        
        with PyCallGraph(output=graphviz, config=config):
            import pytest
            pytest.main(['-xvs', os.path.join(project_path, test_module.replace('.', os.sep) + '.py')])
        
        print(f"Call graph generated: {test_module}_call_graph.png")
        return True
    except Exception as e:
        print(f"Error processing test module {test_module}: {str(e)}")
        return False

print("Analyzing API test files:")
for test in api_test_files[:3]:
    analyze_test_file(test, api_path)

print("\nAnalyzing Frontend test files:")
for test in frontend_test_files[:3]:
    analyze_test_file(test, frontend_path)