In [7]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from networkx.algorithms import bipartite, community


In [8]:
# Load nodes and edge list
pokemon_df = pd.read_csv('../../datasets/clean/pokemon_nodes.csv')
episodes_df = pd.read_csv('../../datasets/clean/episode_nodes.csv')
edge_df = pd.read_csv('../../datasets/clean/edge_list.csv')

# Filter high-rated episodes (rating ≥ 8.5)
high_rated_eps = episodes_df[episodes_df['rating'] >= 8.5]
high_rated_ep_ids = set(high_rated_eps['Id'])
pokemon_ids = set(pokemon_df['Id'])

# Filter edge list for connections to high-rated episodes
filtered_edges = edge_df[
    (edge_df['Source'].isin(pokemon_ids) & edge_df['Target'].isin(high_rated_ep_ids)) |
    (edge_df['Target'].isin(pokemon_ids) & edge_df['Source'].isin(high_rated_ep_ids))
]


In [9]:
G = nx.Graph()
G.add_nodes_from(pokemon_ids, bipartite=0, type='pokemon')
G.add_nodes_from(high_rated_ep_ids, bipartite=1, type='episode')
G.add_edges_from(filtered_edges.values)

print(f"Filtered Graph: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")


Filtered Graph: 931 nodes, 1079 edges


In [10]:
# --- Split node sets ---
pokemon_nodes = {n for n, d in G.nodes(data=True) if d['type'] == 'pokemon'}
episode_nodes = set(G) - pokemon_nodes

# --- Centralities ---
deg_cent = nx.degree_centrality(G)
btw_cent = nx.betweenness_centrality(G)
clo_cent = nx.closeness_centrality(G)

# --- Average centrality values ---
avg_deg_cent = sum(deg_cent.values()) / len(deg_cent)
avg_btw_cent = sum(btw_cent.values()) / len(btw_cent)
avg_clo_cent = sum(clo_cent.values()) / len(clo_cent)

print(f"\nAverage Degree Centrality: {avg_deg_cent:.4f}")
print(f"Average Betweenness Centrality: {avg_btw_cent:.4f}")
print(f"Average Closeness Centrality: {avg_clo_cent:.4f}")

# --- Assortativity ---
assortativity = nx.degree_assortativity_coefficient(G)
print(f"\nAssortativity Coefficient: {assortativity:.4f}")

# --- Degree stats ---
degrees = [deg for n, deg in G.degree()]
mean_deg = sum(degrees) / len(degrees)
max_deg = max(degrees)
min_deg = min(degrees)

print(f"\nGraph Degree Stats:")
print(f"Mean Degree: {mean_deg:.2f}")
print(f"Max Degree: {max_deg}")
print(f"Min Degree: {min_deg}")
print(f"Number of Nodes: {G.number_of_nodes()}")
print(f"Number of Edges: {G.number_of_edges()}")




Average Degree Centrality: 0.0025
Average Betweenness Centrality: 0.0005
Average Closeness Centrality: 0.0697

Assortativity Coefficient: -0.4300

Graph Degree Stats:
Mean Degree: 2.32
Max Degree: 200
Min Degree: 0
Number of Nodes: 931
Number of Edges: 1079


In [11]:
# Project onto Pokemon
pokemon_proj = bipartite.projected_graph(G, pokemon_nodes)

# Community Detection
communities = list(community.greedy_modularity_communities(pokemon_proj))
print(f"Found {len(communities)} communities")

# Optional: mapping community labels
community_map = {}
for i, comm in enumerate(communities):
    for node in comm:
        community_map[node] = i


Found 498 communities


In [12]:
# Largest connected component of original bipartite graph
lcc = max(nx.connected_components(G), key=len)
G_lcc = G.subgraph(lcc).copy()
print(f"Largest CC: {G_lcc.number_of_nodes()} nodes")


Largest CC: 437 nodes
