# Genres and communities

In [2]:
#!pip install networkx

In [3]:
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
from networkx.algorithms.community import greedy_modularity_communities

In [None]:
# Load the dataset
movies_path = 'TMDB_scraped_features.csv'
movies_df = pd.read_csv(movies_path)

# Convert the 'cast' column into a usable format (assumes it's a list of actor IDs/names)
movies_df['cast'] = movies_df['cast'].apply(lambda x: eval(x) if isinstance(x, str) else [])

In [None]:
# Build the actor graph
G = nx.Graph()

# Add edges for actors who acted together in the same movie
for _, row in movies_df.iterrows():
    cast = row['cast']
    for i, actor1 in enumerate(cast):
        for actor2 in cast[i+1:]:
            if not G.has_edge(actor1, actor2):
                G.add_edge(actor1, actor2, movie=row['title'])

In [None]:
# Detect communities
communities = {node: idx for idx, community in enumerate(greedy_modularity_communities(G)) for node in community}

# Create the community graph
H = nx.Graph()

# Add nodes for each community
for actor, community_id in communities.items():
    H.add_node(actor, community=community_id)

# Add edges between communities if actors from different communities acted together
for actor1, actor2 in G.edges():
    community1 = communities[actor1]
    community2 = communities[actor2]
    if community1 != community2:
        if not H.has_edge(community1, community2):
            H.add_edge(community1, community2)

In [None]:
# Print graph stats
print(f"Original Graph: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges.")
print(f"Community Graph: {H.number_of_nodes()} nodes, {H.number_of_edges()} edges.")
