# 04: Semantic Network Exploration

**Thematic Clusters in Game Discourse**

This notebook explores the co-occurrence network of characters and themes.

In [None]:
import sys
sys.path.insert(0, '../src')

import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
from experiment_3_network import SemanticNetwork
from visualization import setup_style, COLORS

setup_style()

## 1. Build the Network

In [None]:
# Initialize network analysis
network = SemanticNetwork(
    '../data/EventTextDump.txt',
    '../data/context_keywords.json'
)
network.load_data()
network.process_dialogues()
network.build_network(threshold=2)
network.detect_communities()

print(f"Nodes: {network.G.number_of_nodes()}")
print(f"Edges: {network.G.number_of_edges()}")
print(f"Communities: {len(set(network.communities.values()))}")

## 2. Network Metrics

In [None]:
# Calculate metrics
metrics = network.calculate_metrics()

print("Network Metrics:")
print(f"  Density: {metrics['density']:.3f}")
print(f"  Average Clustering: {metrics['avg_clustering']:.3f}")
print(f"  Number of Communities: {metrics['num_communities']}")
print(f"\nTop Nodes by Betweenness Centrality:")
for node, cent in list(metrics['top_betweenness'].items())[:5]:
    print(f"  {node}: {cent:.3f}")

## 3. Visualize the Network

In [None]:
# Generate network visualization
fig, ax = network.plot_network()
plt.show()

## 4. Community Analysis

In [None]:
# List nodes by community
from collections import defaultdict
communities = defaultdict(list)
for node, comm in network.communities.items():
    communities[comm].append(node)

print("Community Membership:")
for comm_id in sorted(communities.keys()):
    members = communities[comm_id]
    print(f"\nCommunity {comm_id} ({len(members)} members):")
    print(f"  {', '.join(members)}")

## 5. Co-occurrence Heatmap

In [None]:
# Generate heatmap
fig, ax = network.plot_cooccurrence_heatmap(top_n=15)
plt.show()

## 6. Top Co-occurrence Pairs

In [None]:
# List top pairs
df_pairs = network.create_cooccurrence_table(top_n=20)
print("Top 20 Co-occurrence Pairs:")
print(df_pairs.to_string(index=False))

## 7. Degree Distribution

In [None]:
# Plot degree distribution
degrees = dict(network.G.degree())
degree_values = list(degrees.values())

fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Histogram
axes[0].hist(degree_values, bins=15, color=COLORS['story'], edgecolor='white')
axes[0].set_xlabel('Degree (connections)')
axes[0].set_ylabel('Number of Nodes')
axes[0].set_title('Degree Distribution')

# Top 10 by degree
top_degree = sorted(degrees.items(), key=lambda x: x[1], reverse=True)[:10]
nodes, degs = zip(*top_degree)
axes[1].barh(range(len(nodes)), degs, color=COLORS['combat'])
axes[1].set_yticks(range(len(nodes)))
axes[1].set_yticklabels(nodes)
axes[1].invert_yaxis()
axes[1].set_xlabel('Degree')
axes[1].set_title('Top 10 Nodes by Degree')

plt.tight_layout()
plt.show()

## 8. Entity vs Theme Analysis

In [None]:
# Separate entities and themes
entity_degrees = {n: d for n, d in degrees.items() if network.node_types.get(n) == 'entity'}
theme_degrees = {n: d for n, d in degrees.items() if network.node_types.get(n) == 'theme'}

print(f"Entities in network: {len(entity_degrees)}")
print(f"  Avg degree: {np.mean(list(entity_degrees.values())):.1f}")
print(f"  Top: {max(entity_degrees.items(), key=lambda x: x[1])}")

print(f"\nThemes in network: {len(theme_degrees)}")
print(f"  Avg degree: {np.mean(list(theme_degrees.values())):.1f}")
print(f"  Top: {max(theme_degrees.items(), key=lambda x: x[1])}")

## 9. Interpretation

The semantic network reveals:

1. **Central hub**: 小红帽 (Little Red Riding Hood) is the most connected node, bridging all thematic communities
2. **Community structure**: Clear separation between military, family, and horror discourse
3. **Bridge concepts**: Terms like 血 (blood) and 团长 (commander) connect otherwise separate clusters

---

**Next**: Proceed to `05_motif_tracking.ipynb` to explore how the "Red Hood" symbol evolves.