# Notebook 04: Move Network Analysis

**Objective**: Analyze special move relationships and patterns in the Digimon Knowledge Graph.

This notebook explores:
- Move statistics and distribution
- Move-based clustering and similarity
- Move evolution and inheritance patterns
- Network analysis of move relationships

---

## 1. Setup and Imports

In [None]:
# Standard imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
from collections import Counter, defaultdict
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity, jaccard_score
from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.spatial.distance import pdist, squareform
import warnings
warnings.filterwarnings('ignore')

# Custom utilities
from utils import (
    Neo4jConnector,
    plot_network_interactive, plot_network_static,
    plot_heatmap, save_figure,
    calculate_basic_metrics, find_cliques,
    TYPE_COLORS, LEVEL_COLORS
)

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

print("Environment setup complete!")

## 2. Load Move Data

In [None]:
# Connect to database
conn = Neo4jConnector()
print("Connected to Neo4j database")

# Get all Digimon data
digimon_df = conn.get_all_digimon()
print(f"Loaded {len(digimon_df)} Digimon")

# Get move data
moves_df = conn.get_digimon_moves()
print(f"\nLoaded {len(moves_df)} Digimon-Move relationships")
print(f"Unique moves: {moves_df['move'].nunique()}")
print(f"Digimon with moves: {moves_df['digimon'].nunique()}")

In [None]:
# Display sample move data
print("Sample move data:")
moves_df.head(10)

## 3. Move Statistics

In [None]:
# Move frequency distribution
move_counts = moves_df['move'].value_counts()

print("=== MOVE FREQUENCY STATISTICS ===")
print(f"Total unique moves: {len(move_counts)}")
print(f"Average Digimon per move: {move_counts.mean():.2f}")
print(f"Median Digimon per move: {move_counts.median():.0f}")
print(f"Most common move: {move_counts.index[0]} ({move_counts.iloc[0]} Digimon)")
print(f"Rarest moves: {(move_counts == 1).sum()} moves used by only 1 Digimon")

print("\nTop 15 most common moves:")
for move, count in move_counts.head(15).items():
    print(f"  {move}: {count} Digimon")

In [None]:
# Moves per Digimon statistics
digimon_move_counts = moves_df.groupby('digimon')['move'].count()

print("\n=== MOVES PER DIGIMON STATISTICS ===")
print(f"Average moves per Digimon: {digimon_move_counts.mean():.2f}")
print(f"Median moves per Digimon: {digimon_move_counts.median():.0f}")
print(f"Max moves: {digimon_move_counts.max()} (by {digimon_move_counts.idxmax()})")
print(f"Min moves: {digimon_move_counts.min()}")

# Distribution of move counts
move_count_dist = digimon_move_counts.value_counts().sort_index()
print("\nDistribution of move counts:")
for num_moves, count in move_count_dist.head(10).items():
    print(f"  {num_moves} moves: {count} Digimon")

In [None]:
# Move diversity by type and level
# Merge with Digimon data
moves_with_info = moves_df.merge(
    digimon_df[['name_en', 'type', 'level', 'attribute']], 
    left_on='digimon', 
    right_on='name_en'
)

# Move diversity by type
type_move_diversity = moves_with_info.groupby('type')['move'].nunique().sort_values(ascending=False)
print("\n=== MOVE DIVERSITY BY TYPE (Top 15) ===")
for type_name, unique_moves in type_move_diversity.head(15).items():
    type_count = digimon_df[digimon_df['type'] == type_name].shape[0]
    print(f"  {type_name}: {unique_moves} unique moves ({type_count} Digimon)")

# Move diversity by level
level_move_diversity = moves_with_info.groupby('level')['move'].nunique()
level_order = ['Baby', 'In-Training', 'Rookie', 'Champion', 'Ultimate', 'Mega', 'Ultra']
level_move_diversity = level_move_diversity.reindex(level_order, fill_value=0)

print("\n=== MOVE DIVERSITY BY LEVEL ===")
for level, unique_moves in level_move_diversity.items():
    level_count = digimon_df[digimon_df['level'] == level].shape[0]
    print(f"  {level}: {unique_moves} unique moves ({level_count} Digimon)")

## 4. Move-based Clustering

In [None]:
# Create Digimon-Move matrix for similarity analysis
digimon_move_matrix = moves_df.pivot_table(
    index='digimon', 
    columns='move', 
    values='move',
    aggfunc='count',
    fill_value=0
)

# Convert to binary (has move or not)
digimon_move_binary = (digimon_move_matrix > 0).astype(int)

print(f"Digimon-Move matrix shape: {digimon_move_binary.shape}")
print(f"Sparsity: {(digimon_move_binary == 0).sum().sum() / (digimon_move_binary.shape[0] * digimon_move_binary.shape[1]) * 100:.2f}%")

In [None]:
# Calculate Jaccard similarity between Digimon based on moves
# Sample for computational efficiency
sample_size = min(100, len(digimon_move_binary))
sample_digimon = digimon_move_binary.sample(n=sample_size, random_state=42)

# Calculate pairwise Jaccard distances
jaccard_distances = pdist(sample_digimon.values, metric='jaccard')
jaccard_similarity = 1 - squareform(jaccard_distances)

# Create similarity DataFrame
similarity_df = pd.DataFrame(
    jaccard_similarity,
    index=sample_digimon.index,
    columns=sample_digimon.index
)

print(f"Calculated Jaccard similarity for {sample_size} Digimon")

# Find most similar pairs
similarity_upper = similarity_df.where(
    np.triu(np.ones(similarity_df.shape), k=1).astype(bool)
)
similar_pairs = similarity_upper.stack().sort_values(ascending=False)

print("\n=== MOST SIMILAR DIGIMON (by moves) ===")
for (dig1, dig2), similarity in similar_pairs.head(10).items():
    shared_moves = set(moves_df[moves_df['digimon'] == dig1]['move']) & \
                   set(moves_df[moves_df['digimon'] == dig2]['move'])
    print(f"{dig1} - {dig2}: {similarity:.3f} similarity ({len(shared_moves)} shared moves)")

In [None]:
# Hierarchical clustering based on moves
linkage_matrix = linkage(jaccard_distances, method='ward')

# Create dendrogram
fig, ax = plt.subplots(figsize=(15, 8))
dendrogram(linkage_matrix, labels=sample_digimon.index.tolist(), 
           leaf_rotation=90, leaf_font_size=8, ax=ax)
ax.set_title('Digimon Clustering by Move Similarity (Sample)', fontsize=16, fontweight='bold')
ax.set_xlabel('Digimon')
ax.set_ylabel('Distance')
plt.tight_layout()
save_figure(fig, "digimon_move_dendrogram")
plt.show()

# Extract clusters
n_clusters = 5
clusters = AgglomerativeClustering(n_clusters=n_clusters, linkage='ward')
cluster_labels = clusters.fit_predict(1 - jaccard_similarity)

# Analyze clusters
cluster_df = pd.DataFrame({
    'digimon': sample_digimon.index,
    'cluster': cluster_labels
})

print(f"\n=== MOVE-BASED CLUSTERS ===")
for cluster_id in range(n_clusters):
    cluster_members = cluster_df[cluster_df['cluster'] == cluster_id]['digimon'].tolist()
    print(f"\nCluster {cluster_id + 1} ({len(cluster_members)} members):")
    print(f"  Members: {', '.join(cluster_members[:5])}{'...' if len(cluster_members) > 5 else ''}")
    
    # Find characteristic moves for this cluster
    cluster_moves = moves_df[moves_df['digimon'].isin(cluster_members)]['move'].value_counts()
    cluster_move_pct = cluster_moves / len(cluster_members)
    characteristic_moves = cluster_move_pct[cluster_move_pct > 0.5].head(5)
    
    if len(characteristic_moves) > 0:
        print(f"  Characteristic moves:")
        for move, pct in characteristic_moves.items():
            print(f"    - {move}: {pct:.1%} of cluster")

## 5. Move Evolution Analysis

In [None]:
# Get evolution data
evolution_data = conn.get_evolution_chains()
evolution_df = pd.DataFrame(evolution_data)

# Analyze move inheritance through evolution
move_inheritance = []

for _, evo in evolution_df.iterrows():
    from_digimon = evo['from_digimon']
    to_digimon = evo['to_digimon']
    
    # Get moves for both Digimon
    from_moves = set(moves_df[moves_df['digimon'] == from_digimon]['move'])
    to_moves = set(moves_df[moves_df['digimon'] == to_digimon]['move'])
    
    if from_moves and to_moves:
        inherited_moves = from_moves & to_moves
        new_moves = to_moves - from_moves
        lost_moves = from_moves - to_moves
        
        move_inheritance.append({
            'from_digimon': from_digimon,
            'to_digimon': to_digimon,
            'total_from_moves': len(from_moves),
            'total_to_moves': len(to_moves),
            'inherited_moves': len(inherited_moves),
            'new_moves': len(new_moves),
            'lost_moves': len(lost_moves),
            'inheritance_rate': len(inherited_moves) / len(from_moves) if from_moves else 0
        })

inheritance_df = pd.DataFrame(move_inheritance)

print("=== MOVE EVOLUTION PATTERNS ===")
print(f"Evolution pairs analyzed: {len(inheritance_df)}")
print(f"\nAverage move inheritance rate: {inheritance_df['inheritance_rate'].mean():.2%}")
print(f"Average new moves gained: {inheritance_df['new_moves'].mean():.1f}")
print(f"Average moves lost: {inheritance_df['lost_moves'].mean():.1f}")

# Examples of high inheritance
high_inheritance = inheritance_df.nlargest(10, 'inheritance_rate')
print("\nEvolutions with highest move inheritance:")
for _, row in high_inheritance.iterrows():
    if row['inheritance_rate'] > 0:
        print(f"  {row['from_digimon']} → {row['to_digimon']}: "
              f"{row['inheritance_rate']:.0%} inherited ({row['inherited_moves']}/{row['total_from_moves']} moves)")

In [None]:
# Analyze signature moves (moves that persist through evolution)
# Find evolution chains
evolution_chains = defaultdict(list)

# Build simple chains (2-3 stages)
for _, evo in evolution_df.iterrows():
    evolution_chains[evo['from_digimon']].append(evo['to_digimon'])

# Find moves that persist through chains
persistent_moves = Counter()
chain_count = 0

for start_digimon, evolutions in evolution_chains.items():
    if evolutions:
        # Get moves for start
        start_moves = set(moves_df[moves_df['digimon'] == start_digimon]['move'])
        
        for evo_digimon in evolutions:
            evo_moves = set(moves_df[moves_df['digimon'] == evo_digimon]['move'])
            
            # Find moves that persist
            persisted = start_moves & evo_moves
            for move in persisted:
                persistent_moves[move] += 1
            
            chain_count += 1

print("\n=== SIGNATURE MOVES (Most Persistent) ===")
print(f"Analyzed {chain_count} evolution relationships")
print("\nMoves that persist through evolution:")
for move, count in persistent_moves.most_common(15):
    persistence_rate = count / move_counts[move] if move in move_counts else 0
    print(f"  {move}: persists in {count} evolutions ({persistence_rate:.1%} of users)")

## 6. Move Network Analysis

In [None]:
# Create move co-occurrence network
move_network = nx.Graph()

# Add nodes (moves)
all_moves = moves_df['move'].unique()
move_network.add_nodes_from(all_moves)

# Add edges based on co-occurrence
move_cooccurrence = defaultdict(int)

for digimon in moves_df['digimon'].unique():
    digimon_moves = moves_df[moves_df['digimon'] == digimon]['move'].tolist()
    
    # Count co-occurrences
    for i in range(len(digimon_moves)):
        for j in range(i + 1, len(digimon_moves)):
            move_pair = tuple(sorted([digimon_moves[i], digimon_moves[j]]))
            move_cooccurrence[move_pair] += 1

# Add edges with weight = co-occurrence count
for (move1, move2), count in move_cooccurrence.items():
    if count >= 5:  # Minimum threshold
        move_network.add_edge(move1, move2, weight=count)

print(f"Move network created:")
print(f"  Nodes (moves): {move_network.number_of_nodes()}")
print(f"  Edges (co-occurrences): {move_network.number_of_edges()}")

# Basic network metrics
network_metrics = calculate_basic_metrics(move_network)
print(f"\nNetwork metrics:")
print(f"  Density: {network_metrics['density']:.3f}")
print(f"  Average degree: {network_metrics['average_degree']:.2f}")
print(f"  Connected components: {network_metrics['num_components']}")

In [None]:
# Find move communities (moves that often appear together)
# Get largest connected component
largest_component = max(nx.connected_components(move_network), key=len)
move_subgraph = move_network.subgraph(largest_component).copy()

# Find cliques (groups of moves that all co-occur)
move_cliques = find_cliques(move_subgraph, min_size=3)

print("=== MOVE COMMUNITIES (Cliques) ===")
print(f"Found {len(move_cliques)} cliques of size 3+\n")

# Display largest cliques
sorted_cliques = sorted(move_cliques, key=len, reverse=True)
for i, clique in enumerate(sorted_cliques[:10]):
    clique_moves = list(clique)
    print(f"Clique {i+1} ({len(clique_moves)} moves): {', '.join(clique_moves)}")
    
    # Find Digimon that have all these moves
    digimon_with_all = []
    for digimon in moves_df['digimon'].unique():
        digimon_moves = set(moves_df[moves_df['digimon'] == digimon]['move'])
        if clique.issubset(digimon_moves):
            digimon_with_all.append(digimon)
    
    if digimon_with_all:
        print(f"  Used by: {', '.join(digimon_with_all[:5])}{'...' if len(digimon_with_all) > 5 else ''}")
    print()

In [None]:
# Identify central moves in the network
degree_centrality = nx.degree_centrality(move_network)
betweenness_centrality = nx.betweenness_centrality(move_network, weight='weight')

# Sort by centrality
top_degree_moves = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:10]
top_betweenness_moves = sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)[:10]

print("=== CENTRAL MOVES IN NETWORK ===")
print("\nMost connected moves (high degree centrality):")
for move, centrality in top_degree_moves:
    connections = move_network.degree(move)
    print(f"  {move}: {connections} connections (centrality: {centrality:.3f})")

print("\nBridge moves (high betweenness centrality):")
for move, centrality in top_betweenness_moves:
    print(f"  {move}: betweenness {centrality:.3f}")

## 7. Visualizations

In [None]:
# Move frequency distribution
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Histogram of move frequencies
ax1.hist(move_counts.values, bins=50, edgecolor='black', alpha=0.7)
ax1.set_xlabel('Number of Digimon using move')
ax1.set_ylabel('Number of moves')
ax1.set_title('Move Frequency Distribution', fontsize=14, fontweight='bold')
ax1.set_yscale('log')

# Top moves bar chart
top_moves = move_counts.head(15)
bars = ax2.bar(range(len(top_moves)), top_moves.values, 
                color=sns.color_palette('viridis', len(top_moves)))
ax2.set_xticks(range(len(top_moves)))
ax2.set_xticklabels(top_moves.index, rotation=45, ha='right')
ax2.set_ylabel('Number of Digimon')
ax2.set_title('Top 15 Most Common Moves', fontsize=14, fontweight='bold')

# Add value labels
for bar in bars:
    height = bar.get_height()
    ax2.text(bar.get_x() + bar.get_width()/2., height,
             f'{int(height)}', ha='center', va='bottom', fontsize=8)

plt.tight_layout()
save_figure(fig, "move_frequency_analysis")
plt.show()

In [None]:
# Move diversity by level visualization
fig, ax = plt.subplots(figsize=(10, 6))

# Calculate average moves per Digimon by level
avg_moves_by_level = moves_with_info.groupby('level')['move'].count() / \
                     moves_with_info.groupby('level')['digimon'].nunique()
avg_moves_by_level = avg_moves_by_level.reindex(level_order, fill_value=0)

colors = [LEVEL_COLORS.get(level, '#808080') for level in level_order]
bars = ax.bar(level_order, avg_moves_by_level.values, color=colors)

ax.set_xlabel('Level')
ax.set_ylabel('Average Moves per Digimon')
ax.set_title('Move Complexity by Evolution Level', fontsize=16, fontweight='bold')

# Add value labels
for bar in bars:
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'{height:.1f}', ha='center', va='bottom')

plt.tight_layout()
save_figure(fig, "move_complexity_by_level")
plt.show()

In [None]:
# Move similarity heatmap (sample)
fig, ax = plt.subplots(figsize=(12, 10))

# Sample similarity matrix for visualization
sample_sim = similarity_df.iloc[:30, :30]

sns.heatmap(sample_sim, cmap='YlOrRd', square=True, 
            cbar_kws={'label': 'Jaccard Similarity'},
            xticklabels=True, yticklabels=True, ax=ax)

ax.set_title('Digimon Move Similarity Matrix (Sample)', fontsize=16, fontweight='bold')
plt.setp(ax.get_xticklabels(), rotation=90, ha='right')
plt.setp(ax.get_yticklabels(), rotation=0)

plt.tight_layout()
save_figure(fig, "move_similarity_heatmap")
plt.show()

In [None]:
# Move network visualization (subset)
# Create subgraph of most connected moves
top_degree_nodes = [node for node, _ in sorted(degree_centrality.items(), 
                                               key=lambda x: x[1], reverse=True)[:50]]
move_subgraph = move_network.subgraph(top_degree_nodes).copy()

# Create visualization
fig = plot_network_static(
    move_subgraph,
    title="Move Co-occurrence Network (Top 50 Moves)",
    layout='spring',
    figsize=(15, 15)
)
save_figure(fig, "move_cooccurrence_network")
plt.show()

In [None]:
# Move inheritance visualization
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Inheritance rate distribution
ax1.hist(inheritance_df['inheritance_rate'], bins=20, edgecolor='black', alpha=0.7)
ax1.set_xlabel('Move Inheritance Rate')
ax1.set_ylabel('Number of Evolution Pairs')
ax1.set_title('Distribution of Move Inheritance Rates', fontsize=14, fontweight='bold')
ax1.axvline(inheritance_df['inheritance_rate'].mean(), color='red', 
           linestyle='--', label=f'Mean: {inheritance_df["inheritance_rate"].mean():.2%}')
ax1.legend()

# Move changes during evolution
evolution_changes = pd.DataFrame({
    'Inherited': inheritance_df['inherited_moves'].mean(),
    'New': inheritance_df['new_moves'].mean(),
    'Lost': inheritance_df['lost_moves'].mean()
}, index=['Moves'])

evolution_changes.plot(kind='bar', ax=ax2, color=['green', 'blue', 'red'])
ax2.set_ylabel('Average Number of Moves')
ax2.set_title('Average Move Changes During Evolution', fontsize=14, fontweight='bold')
ax2.set_xticklabels([''], rotation=0)
ax2.legend(loc='upper right')

# Add value labels
for container in ax2.containers:
    ax2.bar_label(container, fmt='%.1f')

plt.tight_layout()
save_figure(fig, "move_evolution_analysis")
plt.show()

## 8. Export Results

In [None]:
# Create results directory
from pathlib import Path
import json

results_dir = Path('../results/data')
results_dir.mkdir(parents=True, exist_ok=True)

# Prepare move statistics
move_statistics = {
    'total_unique_moves': int(moves_df['move'].nunique()),
    'total_digimon_with_moves': int(moves_df['digimon'].nunique()),
    'total_move_relationships': len(moves_df),
    'avg_moves_per_digimon': float(digimon_move_counts.mean()),
    'max_moves_per_digimon': int(digimon_move_counts.max()),
    'most_common_move': move_counts.index[0],
    'rare_moves_count': int((move_counts == 1).sum()),
    'avg_move_inheritance_rate': float(inheritance_df['inheritance_rate'].mean()),
    'move_network_density': float(network_metrics['density']),
    'move_network_components': int(network_metrics['num_components'])
}

# Save statistics
with open(results_dir / 'move_statistics.json', 'w') as f:
    json.dump(move_statistics, f, indent=2)

# Save move frequency data
move_counts.to_frame('count').reset_index().rename(columns={'index': 'move'}).to_csv(
    results_dir / 'move_frequencies.csv', index=False
)

# Save inheritance analysis
inheritance_df.to_csv(results_dir / 'move_inheritance_analysis.csv', index=False)

# Save cluster assignments
cluster_df.to_csv(results_dir / 'move_based_clusters.csv', index=False)

# Save central moves
central_moves_df = pd.DataFrame({
    'move': [m for m, _ in top_degree_moves],
    'degree_centrality': [c for _, c in top_degree_moves],
    'betweenness_centrality': [betweenness_centrality.get(m, 0) for m, _ in top_degree_moves]
})
central_moves_df.to_csv(results_dir / 'central_moves.csv', index=False)

print("Move analysis results exported successfully!")

## Summary and Key Findings

### Move Network Insights:

1. **Move Distribution**:
   - Wide variety of moves with heavy-tailed distribution
   - Most moves are unique or used by few Digimon
   - Some universal moves appear across many Digimon

2. **Move-based Similarity**:
   - Digimon cluster into groups based on shared movesets
   - Move similarity often correlates with type/level
   - Certain move combinations frequently co-occur

3. **Evolution Patterns**:
   - Moderate move inheritance through evolution
   - Evolution typically adds new moves while retaining some core moves
   - Signature moves persist through evolution chains

4. **Network Structure**:
   - Move co-occurrence network shows community structure
   - Central moves act as bridges between different move groups
   - Cliques reveal standard move combinations

These insights reveal the complexity of the move system and how it relates to Digimon identity and evolution.

In [None]:
# Close database connection
conn.close()
print("Move network analysis complete! Database connection closed.")