## Dependency depth

In [3]:
def analyze_dependency_depth_efficient(dependency_data):
    # Build minimal graph
    graph = {}
    for module, data in dependency_data.items():
        graph[module] = set(data.get("imports", [])) & set(dependency_data.keys())
    
    # Find max depth with bounded BFS
    def max_depth_for(module, max_depth_limit=10):
        visited = {module: 0}
        queue = [(module, 0)]
        
        while queue:
            current, depth = queue.pop(0)
            
            if depth >= max_depth_limit:
                continue
                
            for neighbor in graph.get(current, []):
                if neighbor not in visited or visited[neighbor] < depth + 1:
                    visited[neighbor] = depth + 1
                    queue.append((neighbor, depth + 1))
        
        return max(visited.values()) if visited else 0
    
    # Get depth for a sample of modules
    depth_results = {}
    # Take a sample of modules to analyze (e.g., top-level modules)
    sample_modules = [m for m in graph if '.' not in m][:20]  # Analyze up to 20 top-level modules
    
    for module in sample_modules:
        depth_results[module] = max_depth_for(module)
    
    return depth_results

## Cyclic Analysis

In [2]:
def find_cycles_efficient(dependency_data):
    graph = {}
    
    # Build a minimal graph representation
    for module, data in dependency_data.items():
        graph[module] = set(data.get("imports", [])) & set(dependency_data.keys())
    
    # Find cycles using iterative DFS
    def find_cycle_from(start):
        stack = [(start, [start])]
        cycles_found = []
        
        while stack and len(cycles_found) < 10:  # Limit to finding 10 cycles
            node, path = stack.pop()
            
            # Process each neighbor with a depth limit
            for neighbor in list(graph.get(node, []))[:20]:  # Limit neighbors to check
                if neighbor in path:
                    # Found a cycle
                    cycle_index = path.index(neighbor)
                    cycle = path[cycle_index:] + [neighbor]
                    if cycle not in cycles_found:
                        cycles_found.append(cycle)
                        if len(cycles_found) >= 10:
                            break
                elif len(path) < 20:  # Limit depth
                    stack.append((neighbor, path + [neighbor]))
        
        return cycles_found
    
    # Find some representative cycles
    all_cycles = []
    for module in list(graph.keys())[:50]:  # Limit starting nodes to check
        if len(all_cycles) >= 20:  # Limit total cycles to find
            break
        cycles = find_cycle_from(module)
        for cycle in cycles:
            if cycle not in all_cycles:
                all_cycles.append(cycle)
                if len(all_cycles) >= 20:
                    break
    
    return all_cycles

## Unused modules

In [4]:
def find_unused_and_disconnected_efficient(dependency_data):
    # Track which modules are imported by others
    imported_by_others = set()
    
    # Collection of modules that import something
    imports_something = set()
    
    # Single pass through data
    for module, data in dependency_data.items():
        imports = data.get("imports", [])
        
        if imports:
            imports_something.add(module)
            
        for imported in imports:
            if imported in dependency_data:
                imported_by_others.add(imported)
    
    # Get module sets
    all_modules = set(dependency_data.keys())
    
    # Find unused modules (not imported by any other module)
    unused_modules = all_modules - imported_by_others
    
    # Find disconnected modules (no imports and not imported by others)
    disconnected_modules = all_modules - (imports_something | imported_by_others)
    
    # Return limited-size results
    return {
        "unused_modules": list(unused_modules)[:30],  # Limit to 30 examples
        "disconnected_modules": list(disconnected_modules)[:30]  # Limit to 30 examples
    }

In [None]:
import json

try:
    # Load only a portion of the data if necessary
    with open('dependencies.json', 'r', encoding='utf-16') as f:
        dependency_data = json.load(f)
    
    # Option 1: Use smaller sample if full dataset is too large
    # if len(dependency_data) > 1000:
    #     # Take the first 1000 modules or create a meaningful sample
    #     sample_data = {k: dependency_data[k] for k in list(dependency_data.keys())[:1000]}
    #     print(f"Using a sample of {len(sample_data)} modules for analysis")
    #     dependency_data = sample_data
    
    # Find cyclic dependencies with limiting
    print("Finding cyclic dependencies...")
    cycles = find_cycles_efficient(dependency_data)
    print(f"Found {len(cycles)} cyclic dependencies in the sample:")
    for cycle in cycles[:5]:  # Show only the first 5
        print(" -> ".join(cycle))
    
    # Analyze dependency depth
    print("\nAnalyzing dependency depth...")
    depth_analysis = analyze_dependency_depth_efficient(dependency_data)
    # Print top modules with deepest dependencies
    print("\nModules with deepest dependency chains:")
    for module, depth in sorted(depth_analysis.items(), key=lambda x: x[1], reverse=True)[:5]:
        print(f"{module}: depth {depth}")
    
    # Find unused and disconnected modules
    print("\nFinding unused and disconnected modules...")
    unused_disconnected = find_unused_and_disconnected_efficient(dependency_data)
    print(f"\nUnused modules (sample): {len(unused_disconnected['unused_modules'])}")
    print(f"Examples: {', '.join(unused_disconnected['unused_modules'][:5])}")
    print(f"\nDisconnected modules (sample): {len(unused_disconnected['disconnected_modules'])}")
    print(f"Examples: {', '.join(unused_disconnected['disconnected_modules'][:5])}")
    
except MemoryError:
    print("Memory error encountered. Try using a smaller subset of your data or increasing memory limits.")

Finding cyclic dependencies...
Found 20 cyclic dependencies in the sample:
pydantic_core -> pydantic_core.core_schema -> pydantic_core
pydantic_core.core_schema -> pydantic_core -> pydantic_core.core_schema
pydantic.config -> pydantic.fields -> pydantic.config
pydantic -> pydantic.config -> pydantic.fields -> pydantic
pydantic.config -> pydantic.fields -> pydantic.json_schema -> pydantic.config

Analyzing dependency depth...

Modules with deepest dependency chains:
__main__: depth 10
pydantic: depth 10
pydantic_core: depth 10
rich: depth 10
toml: depth 10

Finding unused and disconnected modules...

Unused modules (sample): 1
Examples: __main__

Disconnected modules (sample): 0
Examples: 


In [6]:
import json

try:
    # Load only a portion of the data if necessary
    with open('dependencies.json', 'r', encoding='utf-16') as f:
        dependency_data = json.load(f)
    
    # Option 1: Use smaller sample if full dataset is too large
    # if len(dependency_data) > 1000:
    #     # Take the first 1000 modules or create a meaningful sample
    #     sample_data = {k: dependency_data[k] for k in list(dependency_data.keys())[:1000]}
    #     print(f"Using a sample of {len(sample_data)} modules for analysis")
    #     dependency_data = sample_data
    
    # Find cyclic dependencies with limiting
    print("Finding cyclic dependencies...")
    cycles = find_cycles_efficient(dependency_data)
    print(f"Found {len(cycles)} cyclic dependencies in the sample:")
    for cycle in cycles[:5]:  # Show only the first 5
        print(" -> ".join(cycle))
    
    # Analyze dependency depth
    print("\nAnalyzing dependency depth...")
    depth_analysis = analyze_dependency_depth_efficient(dependency_data)
    # Print top modules with deepest dependencies
    print("\nModules with deepest dependency chains:")
    for module, depth in sorted(depth_analysis.items(), key=lambda x: x[1], reverse=True)[:5]:
        print(f"{module}: depth {depth}")
    
    # Find unused and disconnected modules
    print("\nFinding unused and disconnected modules...")
    unused_disconnected = find_unused_and_disconnected_efficient(dependency_data)
    print(f"\nUnused modules (sample): {len(unused_disconnected['unused_modules'])}")
    print(f"Examples: {', '.join(unused_disconnected['unused_modules'][:5])}")
    print(f"\nDisconnected modules (sample): {len(unused_disconnected['disconnected_modules'])}")
    print(f"Examples: {', '.join(unused_disconnected['disconnected_modules'][:5])}")
    
except MemoryError:
    print("Memory error encountered. Try using a smaller subset of your data or increasing memory limits.")

Finding cyclic dependencies...
Found 20 cyclic dependencies in the sample:
pydantic_core -> pydantic_core.core_schema -> pydantic_core
pydantic_core.core_schema -> pydantic_core -> pydantic_core.core_schema
pydantic.config -> pydantic.fields -> pydantic.config
pydantic -> pydantic.config -> pydantic.fields -> pydantic
pydantic.config -> pydantic.fields -> pydantic.json_schema -> pydantic.config

Analyzing dependency depth...

Modules with deepest dependency chains:
__main__: depth 10
pydantic: depth 10
pydantic_core: depth 10
rich: depth 10
toml: depth 10

Finding unused and disconnected modules...

Unused modules (sample): 1
Examples: __main__

Disconnected modules (sample): 0
Examples: 


In [8]:
print("Found", len(cycles), "cycles in the dependency graph (cyclic dependencies):")
for cycle in cycles: 
    print(" -> ".join(cycle))

Found 20 cycles in the dependency graph (cyclic dependencies):
pydantic_core -> pydantic_core.core_schema -> pydantic_core
pydantic_core.core_schema -> pydantic_core -> pydantic_core.core_schema
pydantic.config -> pydantic.fields -> pydantic.config
pydantic -> pydantic.config -> pydantic.fields -> pydantic
pydantic.config -> pydantic.fields -> pydantic.json_schema -> pydantic.config
pydantic -> pydantic.config -> pydantic.fields -> pydantic.json_schema -> pydantic
pydantic -> pydantic.config -> pydantic.fields -> pydantic.json_schema -> pydantic.root_model -> pydantic
pydantic.fields -> pydantic.json_schema -> pydantic.root_model -> pydantic.fields
pydantic -> pydantic.config -> pydantic.fields -> pydantic.json_schema -> pydantic.root_model -> pydantic._internal._repr -> pydantic
pydantic -> pydantic.config -> pydantic.fields -> pydantic.json_schema -> pydantic.root_model -> pydantic._internal._repr -> pydantic._internal._typing_extra -> pydantic
pydantic._internal._repr -> pydantic._i

In [10]:
# Analyze dependency depth
print("\nAnalyzing dependency depth...")
depth_analysis = analyze_dependency_depth_efficient(dependency_data)
# Print top modules with deepest dependencies
# print("\nModules with deepest dependency chains:")
for module, depth in sorted(depth_analysis.items(), key=lambda x: x[1], reverse=True):
    print(f"{module}: depth {depth}")


Analyzing dependency depth...
__main__: depth 10
pydantic: depth 10
pydantic_core: depth 10
rich: depth 10
toml: depth 10
annotated_types: depth 1
dotenv: depth 0
email_validator: depth 0
typing_extensions: depth 0


In [12]:
def print_cycles_nicely(cycles):
    if not cycles:
        print("No cyclic dependencies found in the dependency graph.")
        return
        
    print(f"Found {len(cycles)} cycles in the dependency graph (cyclic dependencies):")
    print("-" * 80)
    
    for i, cycle in enumerate(cycles, 1):
        print(f"Cycle #{i}:")
        formatted_cycle = " → ".join(cycle)
        print(f"  {formatted_cycle}")
        print("-" * 80)

# Usage:
print_cycles_nicely(cycles)

Found 20 cycles in the dependency graph (cyclic dependencies):
--------------------------------------------------------------------------------
Cycle #1:
  pydantic_core → pydantic_core.core_schema → pydantic_core
--------------------------------------------------------------------------------
Cycle #2:
  pydantic_core.core_schema → pydantic_core → pydantic_core.core_schema
--------------------------------------------------------------------------------
Cycle #3:
  pydantic.config → pydantic.fields → pydantic.config
--------------------------------------------------------------------------------
Cycle #4:
  pydantic → pydantic.config → pydantic.fields → pydantic
--------------------------------------------------------------------------------
Cycle #5:
  pydantic.config → pydantic.fields → pydantic.json_schema → pydantic.config
--------------------------------------------------------------------------------
Cycle #6:
  pydantic → pydantic.config → pydantic.fields → pydantic.json_schema 

In [11]:
def print_cycles_vertical(cycles):
    if not cycles:
        print("No cyclic dependencies found in the dependency graph.")
        return
        
    print(f"Found {len(cycles)} cycles in the dependency graph (cyclic dependencies):")
    
    for i, cycle in enumerate(cycles, 1):
        print(f"\nCycle #{i}:")
        for j, module in enumerate(cycle):
            if j < len(cycle) - 1:
                print(f"  {module} ↓")
            else:
                print(f"  {module} → cycles back to {cycle[0]}")
        print("-" * 40)

# Usage:
print_cycles_vertical(cycles)

Found 20 cycles in the dependency graph (cyclic dependencies):

Cycle #1:
  pydantic_core ↓
  pydantic_core.core_schema ↓
  pydantic_core → cycles back to pydantic_core
----------------------------------------

Cycle #2:
  pydantic_core.core_schema ↓
  pydantic_core ↓
  pydantic_core.core_schema → cycles back to pydantic_core.core_schema
----------------------------------------

Cycle #3:
  pydantic.config ↓
  pydantic.fields ↓
  pydantic.config → cycles back to pydantic.config
----------------------------------------

Cycle #4:
  pydantic ↓
  pydantic.config ↓
  pydantic.fields ↓
  pydantic → cycles back to pydantic
----------------------------------------

Cycle #5:
  pydantic.config ↓
  pydantic.fields ↓
  pydantic.json_schema ↓
  pydantic.config → cycles back to pydantic.config
----------------------------------------

Cycle #6:
  pydantic ↓
  pydantic.config ↓
  pydantic.fields ↓
  pydantic.json_schema ↓
  pydantic → cycles back to pydantic
----------------------------------------