# Graph Creation

In [24]:
import networkx as nx
import pandas as pd

In [25]:
# General paths

# 70s
general70s_path = "Data/EdgeData/1970s/General/70s.csv"

# 80s
general80s_path = "Data/EdgeData/1980s/General/80s.csv"

# categorical paths

# 70s
categorical70s_path = "Data/EdgeData/1970s/Categorical"

# 80s
categorical80s_path = "Data/EdgeData/1980s/Categorical"

In [26]:
# A set of categories
categories = {'cinematographer', 'archive_footage', 'self', 'composer', 'writer', 'actress', 'director', 'editor',
              'producer', 'archive_sound', 'actor', 'production_designer'}

In [27]:
# Nodes data initialisation
nodedata70s = []
nodedata80s = []

In [28]:
# add 70s nodes
for category in categories:
    category70s_path = categorical70s_path + "/{}70s.csv".format(category)

    category70s = pd.read_csv(category70s_path, sep=',', low_memory=False)

    nodes70s_set = set(category70s['Source'])
    nodes70s_set = nodes70s_set.union(set(category70s['Target']))

    nodedata70s.append(list(nodes70s_set))

In [29]:
# add 80s nodes
for category in categories:
    category80s_path = categorical80s_path + "/{}80s.csv".format(category)

    category80s = pd.read_csv(category80s_path, sep=',', low_memory=False)

    nodes80s_set = set(category80s['Source'])

    nodes80s_set = nodes80s_set.union(set(category80s['Target']))

    nodedata80s.append(list(nodes80s_set))

In [30]:
# Edge data initialisation
edges70s = pd.read_csv(general70s_path, sep=',', low_memory=False)
edges80s = pd.read_csv(general80s_path, sep=',', low_memory=False)

In [31]:
# Initialize graph
G70s= nx.Graph()
G80s= nx.Graph()

In [32]:
# Add 70s nodes
for i in range(len(nodedata70s)):
    for node in nodedata70s[i]:
        G70s.add_node(node, category=list(categories)[i])

In [33]:
# Add 80s nodes
for i in range(len(nodedata80s)):
    for node in nodedata80s[i]:
        G80s.add_node(node, category=list(categories)[i])

In [34]:
# Add 70s edges
G70s.add_edges_from(edges70s.values)

In [35]:
# Add 80s edges
G80s.add_edges_from(edges80s.values)

In [36]:
# Graph is undirected
G70s = nx.to_undirected(G70s)
G80s = nx.to_undirected(G80s)

# Analysis
## Eigenvector Centrality

In [37]:
# Calculate 70s eigenvector centrality
centrality_scores = nx.eigenvector_centrality(G70s)

# Add eigenvector centrality as an attribute to nodes
nx.set_node_attributes(G70s, centrality_scores, 'eigenvector_centrality')

In [38]:
# Print the eigenvector centrality attribute for each node
for node, data in G70s.nodes(data=True):
    eigenvector_centrality = data['eigenvector_centrality']

In [39]:
# Calculate 80s eigenvector centrality
centrality_scores = nx.eigenvector_centrality(G80s)

# Add eigenvector centrality as an attribute to nodes
nx.set_node_attributes(G80s, centrality_scores, 'eigenvector_centrality')

In [40]:
# Print the eigenvector centrality attribute for each node
for node, data in G80s.nodes(data=True):
    eigenvector_centrality = data['eigenvector_centrality']

In [41]:
# Get the top 10 70s nodes with the highest eigenvector centrality
sorted_nodes = sorted(G70s.nodes(data=True), key=lambda x: x[1]['eigenvector_centrality'], reverse=True)
for node, attributes in sorted_nodes[:10]:
    print(f"Node {node}, attributes: {attributes}")

Node nm0473742, attributes: {'category': 'composer', 'eigenvector_centrality': 0.14117340737650347}
Node nm0005983, attributes: {'category': 'composer', 'eigenvector_centrality': 0.13928976177036276}
Node nm0701403, attributes: {'category': 'composer', 'eigenvector_centrality': 0.1242511477518488}
Node nm0474876, attributes: {'category': 'actor', 'eigenvector_centrality': 0.12142063438673219}
Node nm0004429, attributes: {'category': 'actor', 'eigenvector_centrality': 0.11763983313817046}
Node nm0004435, attributes: {'category': 'actor', 'eigenvector_centrality': 0.1093312625400444}
Node nm0659250, attributes: {'category': 'actress', 'eigenvector_centrality': 0.10040261939739122}
Node nm0006369, attributes: {'category': 'actor', 'eigenvector_centrality': 0.09948509652042664}
Node nm0004660, attributes: {'category': 'actor', 'eigenvector_centrality': 0.09830254524595704}
Node nm0006284, attributes: {'category': 'composer', 'eigenvector_centrality': 0.09582025560959898}


In [42]:
# Get the top 10 80s nodes with the highest eigenvector centrality
sorted_nodes = sorted(G80s.nodes(data=True), key=lambda x: x[1]['eigenvector_centrality'], reverse=True)
for node, attributes in sorted_nodes[:10]:
    print(f"Node {node}, attributes: {attributes}")

Node nm0007123, attributes: {'category': 'actor', 'eigenvector_centrality': 0.19065903679970633}
Node nm0006137, attributes: {'category': 'composer', 'eigenvector_centrality': 0.18619941067201495}
Node nm0482320, attributes: {'category': 'actor', 'eigenvector_centrality': 0.174151621079463}
Node nm0893449, attributes: {'category': 'actor', 'eigenvector_centrality': 0.15059342227119413}
Node nm0796105, attributes: {'category': 'composer', 'eigenvector_centrality': 0.1397729088658764}
Node nm1431061, attributes: {'category': 'writer', 'eigenvector_centrality': 0.11541627113806024}
Node nm0811794, attributes: {'category': 'actress', 'eigenvector_centrality': 0.11536267036368311}
Node nm0858128, attributes: {'category': 'actor', 'eigenvector_centrality': 0.10859339575019232}
Node nm0882219, attributes: {'category': 'actress', 'eigenvector_centrality': 0.10242987689990948}
Node nm0329727, attributes: {'category': 'actor', 'eigenvector_centrality': 0.10014466639365688}


# Betweeness Centrality

In [43]:
# Calculate 70s betweenness centrality
betweenness = nx.betweenness_centrality(G70s)

# Add betweenness centrality as an attribute to nodes
nx.set_node_attributes(G70s, betweenness, 'betweenness_centrality')

In [44]:
# Print 70s the betweenness centrality attribute of each node
for node, data in G70s.nodes(data=True):
    print(f"Node {node}, Betweenness Centrality: {data['betweenness_centrality']}")

Node nm2988676, Betweenness Centrality: 0.0
Node nm11634453, Betweenness Centrality: 0.0
Node nm0361916, Betweenness Centrality: 0.0
Node nm0927595, Betweenness Centrality: 0.0
Node nm0800436, Betweenness Centrality: 0.0
Node nm0217507, Betweenness Centrality: 0.0
Node nm1241060, Betweenness Centrality: 0.0
Node nm0772994, Betweenness Centrality: 0.00047142833548134374
Node nm0677758, Betweenness Centrality: 0.0
Node nm0534120, Betweenness Centrality: 0.0
Node nm0513375, Betweenness Centrality: 2.2528572581013383e-05
Node nm0703067, Betweenness Centrality: 0.0
Node nm0144594, Betweenness Centrality: 0.0
Node nm0209137, Betweenness Centrality: 0.0
Node nm0102925, Betweenness Centrality: 0.0
Node nm3194806, Betweenness Centrality: 0.0
Node nm6322259, Betweenness Centrality: 0.0
Node nm1859529, Betweenness Centrality: 0.0
Node nm0435062, Betweenness Centrality: 0.0
Node nm0232766, Betweenness Centrality: 0.0
Node nm0089280, Betweenness Centrality: 0.0
Node nm10955713, Betweenness Centrali

In [None]:
# Calculate 80s betweenness centrality
betweenness = nx.betweenness_centrality(G80s)

# Add betweenness centrality as an attribute to nodes
nx.set_node_attributes(G80s, betweenness, 'betweenness_centrality')

In [None]:
# Print 80s the betweenness centrality attribute of each node
for node, data in G80s.nodes(data=True):
    print(f"Node {node}, Betweenness Centrality: {data['betweenness_centrality']}")

# Closeness Centrality

In [None]:
# Calculate 70s closeness centrality
closeness = nx.betweenness_centrality(G70s)

# Add closeness centrality as an attribute to nodes
nx.set_node_attributes(G70s, closeness, 'closeness_centrality')

In [None]:
# Print 70s the betweenness centrality attribute of each node
for node, data in G70s.nodes(data=True):
    print(f"Node {node}, Closeness Centrality: {data['betweenness']}")

In [None]:
# Calculate 80s closeness centrality
closeness = nx.betweenness_centrality(G80s)

# Add closeness centrality as an attribute to nodes
nx.set_node_attributes(G80s, closeness, 'closeness_centrality')

In [None]:
# Print 80s the betweenness centrality attribute of each node
for node, data in G80s.nodes(data=True):
    print(f"Node {node}, Closeness Centrality: {data['closeness_centrality']}")

# Louvian Community Detection

In [None]:
import community

In [None]:
# Perform Louvain community detection on 70s
partition = community.best_partition(G70s)

# Add community assignment as an attribute to each node
for node, community_id in partition.items():
    G70s.nodes[node]['community'] = community_id

In [None]:
# Print 70s community assignment attribute of each node
for node, data in G70s.nodes(data=True):
    print(f"Node {node}, Community: {data['community']}")

In [None]:
# Perform Louvain community detection on 80s
partition = community.best_partition(G80s)

# Add community assignment as an attribute to each node
for node, community_id in partition.items():
    G80s.nodes[node]['community'] = community_id

In [None]:
# Print 80s community assignment attribute of each node
for node, data in G80s.nodes(data=True):
    print(f"Node {node}, Community: {data['community']}")

# Convert 70s and 80s graphs to json

In [None]:
from networkx.readwrite import json_graph
import json

In [None]:
# Convert the 70s graph to a JSON-compatible data structure
data = json_graph.node_link_data(G70s)

# Save the data to a JSON file
filename = "Data/Graphs/G70s.json"
with open(filename, "w") as f:
    json.dump(data, f, indent=4)

print("Graph converted and saved as", filename)

In [None]:
# Convert the 80s graph to a JSON-compatible data structure
data = json_graph.node_link_data(G80s)

# Save the data to a JSON file
filename = "Data/Graphs/G80s.json"
with open(filename, "w") as f:
    json.dump(data, f, indent=4)

print("Graph converted and saved as", filename)