# Notebook 02: Evolution Network Analysis

**Objective**: Map and analyze evolution patterns in the Digimon Knowledge Graph.

This notebook explores:
- Evolution chain extraction and analysis
- Evolution metrics and statistics
- Temporal progression through levels
- Visualization of evolution networks and flows

---

## 1. Setup and Imports

In [1]:
# Standard imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
from collections import defaultdict, Counter
from pathlib import Path
import json
import warnings
warnings.filterwarnings('ignore')

# Custom utilities
from utils import (
    Neo4jConnector, 
    plot_network_interactive, plot_network_static,
    plot_evolution_sankey, save_figure,
    find_evolution_chains, calculate_basic_metrics,
    LEVEL_COLORS
)

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

print("Environment setup complete!")

Environment setup complete!


## 2. Load Evolution Data

In [2]:
# Connect to database
conn = Neo4jConnector()
print("Connected to Neo4j database")

# Get all Digimon data
digimon_df = conn.get_all_digimon()
print(f"Loaded {len(digimon_df)} Digimon")

# Get evolution relationships
evolution_data = conn.get_evolution_chains()
print(f"\nLoaded {len(evolution_data)} evolution relationships")

Connected to Neo4j database
Loaded 1258 Digimon

Loaded 3746 evolution relationships


In [3]:
# Create evolution DataFrame
evolution_df = pd.DataFrame(evolution_data)
print("Evolution data sample:")
evolution_df.head(10)

Evolution data sample:


Unnamed: 0,from_digimon,to_digimon,type
0,DORIMON,DORUMON,
1,DORIMON,DODOMON,
2,DORIMON,DORUGAMON,
3,DORUMON,DORUGUREMON,
4,DORUMON,DORUGAMON,
5,DORUMON,HISYARYUMON,
6,DODOMON,DORUMON,
7,DODOMON,DORUGAMON,
8,DODOMON,DORIMON,
9,DORUGAMON,DORUGUREMON,


In [4]:
# Build evolution graph
G = nx.DiGraph()

# Add nodes with attributes
for _, digimon in digimon_df.iterrows():
    G.add_node(
        digimon['name_en'],
        level=digimon['level'],
        type=digimon['type'],
        attribute=digimon['attribute']
    )

# Add evolution edges
for _, evo in evolution_df.iterrows():
    G.add_edge(
        evo['from_digimon'],
        evo['to_digimon'],
        type=evo.get('type', 'evolution')
    )

print(f"\nEvolution graph created:")
print(f"  - Nodes: {G.number_of_nodes()}")
print(f"  - Edges: {G.number_of_edges()}")

ValueError: None cannot be a node

## 3. Evolution Chain Analysis

In [None]:
# Find all evolution chains
print("Finding evolution chains...")

# Find nodes with no incoming evolution edges (chain starts)
chain_starts = []
for node in G.nodes():
    if G.in_degree(node) == 0 and G.out_degree(node) > 0:
        chain_starts.append(node)

print(f"Found {len(chain_starts)} chain starting points")

# Build chains
all_chains = []

def build_chain(node, current_chain, visited):
    current_chain.append(node)
    visited.add(node)
    
    successors = list(G.successors(node))
    
    if not successors:
        # End of chain
        all_chains.append(current_chain.copy())
    else:
        for successor in successors:
            if successor not in visited:
                build_chain(successor, current_chain.copy(), visited.copy())

for start in chain_starts:
    build_chain(start, [], set())

print(f"\nFound {len(all_chains)} complete evolution chains")

In [None]:
# Analyze chain lengths
chain_lengths = [len(chain) for chain in all_chains]
length_distribution = Counter(chain_lengths)

print("=== EVOLUTION CHAIN LENGTH STATISTICS ===")
print(f"Average chain length: {np.mean(chain_lengths):.2f}")
print(f"Median chain length: {np.median(chain_lengths):.0f}")
print(f"Longest chain: {max(chain_lengths)}")
print(f"Shortest chain: {min(chain_lengths)}")

print("\nChain length distribution:")
for length, count in sorted(length_distribution.items()):
    print(f"  Length {length}: {count} chains")

In [None]:
# Find longest chains
longest_chains = [chain for chain in all_chains if len(chain) == max(chain_lengths)]
print(f"\n=== LONGEST EVOLUTION CHAINS ({max(chain_lengths)} stages) ===")
for i, chain in enumerate(longest_chains[:5]):
    print(f"\nChain {i+1}:")
    for j, digimon in enumerate(chain):
        level = G.nodes[digimon].get('level', 'Unknown')
        print(f"  {j+1}. {digimon} ({level})")

In [None]:
# Analyze branching patterns
branching_nodes = []
for node in G.nodes():
    out_degree = G.out_degree(node)
    if out_degree > 1:
        branching_nodes.append((node, out_degree))

branching_nodes.sort(key=lambda x: x[1], reverse=True)

print("=== TOP BRANCHING DIGIMON ===")
print("(Digimon with multiple evolution paths)\n")
for digimon, branches in branching_nodes[:10]:
    level = G.nodes[digimon].get('level', 'Unknown')
    print(f"{digimon} ({level}): {branches} evolution paths")

## 4. Level Transition Analysis

In [None]:
# Create level transition matrix
level_order = ['Baby', 'In-Training', 'Rookie', 'Champion', 'Ultimate', 'Mega', 'Ultra']
transition_matrix = pd.DataFrame(0, index=level_order, columns=level_order)

for edge in G.edges():
    from_node, to_node = edge
    from_level = G.nodes[from_node].get('level', 'Unknown')
    to_level = G.nodes[to_node].get('level', 'Unknown')
    
    if from_level in level_order and to_level in level_order:
        transition_matrix.loc[from_level, to_level] += 1

print("=== LEVEL TRANSITION MATRIX ===")
print(transition_matrix)

In [None]:
# Analyze skip evolutions
skip_evolutions = []
normal_evolutions = []

for edge in G.edges():
    from_node, to_node = edge
    from_level = G.nodes[from_node].get('level', 'Unknown')
    to_level = G.nodes[to_node].get('level', 'Unknown')
    
    if from_level in level_order and to_level in level_order:
        from_idx = level_order.index(from_level)
        to_idx = level_order.index(to_level)
        
        if to_idx - from_idx > 1:
            skip_evolutions.append((from_node, to_node, from_level, to_level))
        elif to_idx - from_idx == 1:
            normal_evolutions.append((from_node, to_node, from_level, to_level))

print(f"\nEvolution pattern analysis:")
print(f"  - Normal evolutions (next level): {len(normal_evolutions)}")
print(f"  - Skip evolutions (jump levels): {len(skip_evolutions)}")
print(f"  - Skip evolution rate: {len(skip_evolutions) / (len(normal_evolutions) + len(skip_evolutions)) * 100:.1f}%")

print("\nExample skip evolutions:")
for i, (from_d, to_d, from_l, to_l) in enumerate(skip_evolutions[:5]):
    print(f"  {from_d} ({from_l}) → {to_d} ({to_l})")

## 5. Type and Attribute Stability

In [None]:
# Analyze type changes during evolution
type_changes = []
type_stable = []

for edge in G.edges():
    from_node, to_node = edge
    from_type = G.nodes[from_node].get('type', 'Unknown')
    to_type = G.nodes[to_node].get('type', 'Unknown')
    
    if from_type != 'Unknown' and to_type != 'Unknown':
        if from_type == to_type:
            type_stable.append((from_node, to_node, from_type))
        else:
            type_changes.append((from_node, to_node, from_type, to_type))

type_stability_rate = len(type_stable) / (len(type_stable) + len(type_changes)) * 100

print("=== TYPE STABILITY ANALYSIS ===")
print(f"Type-stable evolutions: {len(type_stable)}")
print(f"Type-changing evolutions: {len(type_changes)}")
print(f"Type stability rate: {type_stability_rate:.1f}%")

# Most common type changes
type_change_patterns = Counter([(tc[2], tc[3]) for tc in type_changes])
print("\nMost common type changes:")
for (from_type, to_type), count in type_change_patterns.most_common(10):
    print(f"  {from_type} → {to_type}: {count} occurrences")

In [None]:
# Analyze attribute changes during evolution
attr_changes = []
attr_stable = []

for edge in G.edges():
    from_node, to_node = edge
    from_attr = G.nodes[from_node].get('attribute', 'Unknown')
    to_attr = G.nodes[to_node].get('attribute', 'Unknown')
    
    if from_attr != 'Unknown' and to_attr != 'Unknown':
        if from_attr == to_attr:
            attr_stable.append((from_node, to_node, from_attr))
        else:
            attr_changes.append((from_node, to_node, from_attr, to_attr))

attr_stability_rate = len(attr_stable) / (len(attr_stable) + len(attr_changes)) * 100

print("\n=== ATTRIBUTE STABILITY ANALYSIS ===")
print(f"Attribute-stable evolutions: {len(attr_stable)}")
print(f"Attribute-changing evolutions: {len(attr_changes)}")
print(f"Attribute stability rate: {attr_stability_rate:.1f}%")

# Attribute change patterns
attr_change_patterns = Counter([(ac[2], ac[3]) for ac in attr_changes])
print("\nAttribute change patterns:")
for (from_attr, to_attr), count in attr_change_patterns.most_common():
    print(f"  {from_attr} → {to_attr}: {count} occurrences")

## 6. Evolution Network Visualization

In [None]:
# Create a smaller subgraph for visualization (most connected component)
if nx.is_weakly_connected(G):
    largest_component = G
else:
    components = list(nx.weakly_connected_components(G))
    largest_component = G.subgraph(max(components, key=len)).copy()

print(f"Largest connected component: {largest_component.number_of_nodes()} nodes")

# Create visualization of a sample
if largest_component.number_of_nodes() > 100:
    # Sample for visualization
    central_nodes = sorted(nx.degree_centrality(largest_component).items(), 
                          key=lambda x: x[1], reverse=True)[:50]
    sample_nodes = [node for node, _ in central_nodes]
    
    # Add neighbors
    extended_nodes = set(sample_nodes)
    for node in sample_nodes:
        extended_nodes.update(G.predecessors(node))
        extended_nodes.update(G.successors(node))
    
    sample_graph = G.subgraph(list(extended_nodes)[:100]).copy()
else:
    sample_graph = largest_component

# Create static visualization
fig = plot_network_static(
    sample_graph,
    title="Evolution Network (Sample)",
    node_color_attr='level',
    layout='kamada_kawai',
    figsize=(15, 15)
)
save_figure(fig, "evolution_network_sample")
plt.show()

In [None]:
# Create level transition heatmap
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(transition_matrix, annot=True, fmt='d', cmap='YlOrRd', 
            cbar_kws={'label': 'Number of Evolutions'}, ax=ax)
ax.set_title('Level Transition Heatmap', fontsize=16, fontweight='bold')
ax.set_xlabel('To Level')
ax.set_ylabel('From Level')
plt.tight_layout()
save_figure(fig, "level_transition_heatmap")
plt.show()

In [None]:
# Create Sankey diagram for evolution flow
# Prepare data for top evolution paths
evolution_sample = evolution_df.head(100)  # Sample for visualization
fig = plot_evolution_sankey(
    evolution_sample.to_dict('records'),
    title="Evolution Flow Diagram (Sample)"
)
save_figure(fig, "evolution_sankey")
fig.show()

In [None]:
# Chain length distribution visualization
fig, ax = plt.subplots(figsize=(10, 6))
chain_length_counts = list(length_distribution.values())
chain_length_labels = list(length_distribution.keys())

bars = ax.bar(chain_length_labels, chain_length_counts, 
               color=sns.color_palette('viridis', len(chain_length_labels)))
ax.set_xlabel('Chain Length')
ax.set_ylabel('Number of Chains')
ax.set_title('Evolution Chain Length Distribution', fontsize=16, fontweight='bold')

# Add value labels
for bar in bars:
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'{int(height)}', ha='center', va='bottom')

plt.tight_layout()
save_figure(fig, "chain_length_distribution")
plt.show()

## 7. Evolution Metrics Export

In [None]:
# Prepare evolution metrics
evolution_metrics = {
    'total_evolution_relationships': len(evolution_df),
    'total_chains': len(all_chains),
    'average_chain_length': np.mean(chain_lengths),
    'max_chain_length': max(chain_lengths),
    'min_chain_length': min(chain_lengths),
    'branching_nodes': len(branching_nodes),
    'type_stability_rate': type_stability_rate,
    'attribute_stability_rate': attr_stability_rate,
    'skip_evolution_rate': len(skip_evolutions) / (len(normal_evolutions) + len(skip_evolutions)) * 100
}

# Export results
results_dir = Path('../results/data')
results_dir.mkdir(parents=True, exist_ok=True)

# Save metrics
with open(results_dir / 'evolution_metrics.json', 'w') as f:
    json.dump(evolution_metrics, f, indent=2)

# Save transition matrix
transition_matrix.to_csv(results_dir / 'level_transition_matrix.csv')

# Save chain data
chain_data = []
for i, chain in enumerate(all_chains[:100]):  # Save first 100 chains
    chain_data.append({
        'chain_id': i,
        'length': len(chain),
        'chain': ' → '.join(chain)
    })
pd.DataFrame(chain_data).to_csv(results_dir / 'evolution_chains_sample.csv', index=False)

print("Evolution analysis results exported!")

## Summary and Key Findings

### Evolution Pattern Insights:

1. **Chain Structure**:
   - Most evolution chains follow predictable level progressions
   - Average chain length suggests multi-stage evolution is common
   - Some Digimon serve as major branching points with multiple evolution paths

2. **Level Transitions**:
   - Most evolutions follow sequential level progression
   - Skip evolutions (jumping levels) are relatively rare but significant
   - Certain levels show higher evolution activity

3. **Type/Attribute Stability**:
   - Types show moderate stability through evolution
   - Attributes tend to be more stable than types
   - Specific type/attribute changes follow patterns

4. **Network Characteristics**:
   - Evolution network shows hub-and-spoke patterns
   - Some Digimon are evolution "dead ends" with no further forms
   - Branching evolution creates complex relationship networks

These insights provide foundation for understanding Digimon evolution mechanics and relationships.

In [None]:
# Close database connection
conn.close()
print("Evolution analysis complete! Database connection closed.")