# Graph Creation

In [18]:
import networkx as nx
import pandas as pd

In [19]:
# General paths

# 70s
general70s_path = "Data/EdgeData/1970s/General/70s.csv"

# 80s
general80s_path = "Data/EdgeData/1980s/General/80s.csv"

# categorical paths

# 70s
categorical70s_path = "Data/EdgeData/1970s/Categorical"

# 80s
categorical80s_path = "Data/EdgeData/1980s/Categorical"

In [20]:
# A set of categories
categories = ['writer', 'composer', 'editor', 'actor', 'self', 'archive_sound', 'producer', 'actress', 'production_designer', 'cinematographer', 'director', 'archive_footage']

In [40]:
# Nodes data initialisation
nodedata70s = []
nodedata80s = []

In [41]:
# add 70s nodes
for category in categories:
    category70s_path = categorical70s_path + "/{}70s.csv".format(category)

    category70s = pd.read_csv(category70s_path, sep=',', low_memory=False)

    nodes70s_set = set(category70s['Source'])
    nodes70s_set = nodes70s_set.union(set(category70s['Target']))

    nodedata70s.append(list(nodes70s_set))

In [42]:
# add 80s nodes
for category in categories:
    category80s_path = categorical80s_path + "/{}80s.csv".format(category)

    category80s = pd.read_csv(category80s_path, sep=',', low_memory=False)

    nodes80s_set = set(category80s['Source'])
    nodes80s_set = nodes80s_set.union(set(category80s['Target']))

    nodedata80s.append(list(nodes80s_set))

In [43]:
# Edge data initialisation
edges70s = pd.read_csv(general70s_path, sep=',', low_memory=False)
edges80s = pd.read_csv(general80s_path, sep=',', low_memory=False)

In [44]:
# Initialize graph
G70s= nx.Graph()
G80s= nx.Graph()

In [45]:
# Add 70s nodes
print(len(nodedata70s))
print(len(categories))
for i in range(len(nodedata70s)):
    for node in nodedata70s[i]:
        G70s.add_node(node, category=categories[i])

12
12


In [46]:
# Add 80s nodes
print(len(nodedata80s))
print(len(categories))
for i in range(len(nodedata80s)):
    for node in nodedata80s[i]:
        G80s.add_node(node, category=categories[i])

12
12


In [47]:
# Add 70s edges
G70s.add_edges_from(edges70s.values)

In [48]:
# Add 80s edges
G80s.add_edges_from(edges80s.values)

In [49]:
# Graph is undirected
G70s = nx.to_undirected(G70s)
G80s = nx.to_undirected(G80s)

# Analysis
## Eigenvector Centrality

In [50]:
# Calculate 70s eigenvector centrality
centrality_scores = nx.eigenvector_centrality(G70s)

# Add eigenvector centrality as an attribute to nodes
nx.set_node_attributes(G70s, centrality_scores, 'eigenvector_centrality')

In [51]:
# Print the eigenvector centrality attribute for each node
for node, data in G70s.nodes(data=True):
    eigenvector_centrality = data['eigenvector_centrality']

In [52]:
# Calculate 80s eigenvector centrality
centrality_scores = nx.eigenvector_centrality(G80s)

# Add eigenvector centrality as an attribute to nodes
nx.set_node_attributes(G80s, centrality_scores, 'eigenvector_centrality')

In [53]:
# Print the eigenvector centrality attribute for each node
for node, data in G80s.nodes(data=True):
    eigenvector_centrality = data['eigenvector_centrality']

In [54]:
# Get the top 70s node with the highest eigenvector centrality
sorted_nodes = sorted(G70s.nodes(data=True), key=lambda x: x[1]['eigenvector_centrality'], reverse=True)
for node, attributes in sorted_nodes[:1]:
    print(f"Node {node}, attributes: {attributes}")

Node nm0473742, attributes: {'category': 'composer', 'eigenvector_centrality': 0.1411734073765034}


In [55]:
# Get the top 80s node with the highest eigenvector centrality
sorted_nodes = sorted(G80s.nodes(data=True), key=lambda x: x[1]['eigenvector_centrality'], reverse=True)
for node, attributes in sorted_nodes[:1]:
    print(f"Node {node}, attributes: {attributes}")

Node nm0007123, attributes: {'category': 'actor', 'eigenvector_centrality': 0.19065903679970608}


# Betweeness Centrality

In [60]:
# Calculate 70s betweenness centrality
betweenness = nx.betweenness_centrality(G70s)

# Add betweenness centrality as an attribute to nodes
nx.set_node_attributes(G70s, betweenness, 'betweenness_centrality')

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "C:\Users\niels\PycharmProjects\Webscience\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3508, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\niels\AppData\Local\Temp\ipykernel_2036\2206203801.py", line 2, in <module>
    betweenness = nx.betweenness_centrality(G70s)
  File "C:\Users\niels\PycharmProjects\Webscience\venv\lib\site-packages\networkx\classes\backends.py", line 148, in wrapper
    return func(*args, **kwds)
  File "C:\Users\niels\PycharmProjects\Webscience\venv\lib\site-packages\networkx\utils\decorators.py", line 766, in func
    return argmap._lazy_compile(__wrapper)(*args, **kwargs)
  File "<class 'networkx.utils.decorators.argmap'> compilation 16", line 4, in argmap_betweenness_centrality_13
    import inspect
  File "C:\Users\niels\PycharmProjects\Webscience\venv\lib\site-packages\networkx\algorithms\centrality\betweenness.py", line 131, in betweenness_centrality
    S, P, s

In [None]:
# Get the top 70s node with the highest betweenness centrality
sorted_nodes = sorted(G70s.nodes(data=True), key=lambda x: x[1]['betweenness_centrality'], reverse=True)
for node, attributes in sorted_nodes[:1]:
    print(f"Node {node}, attributes: {attributes}")

In [None]:
# Calculate 80s betweenness centrality
betweenness = nx.betweenness_centrality(G80s)

# Add betweenness centrality as an attribute to nodes
nx.set_node_attributes(G80s, betweenness, 'betweenness_centrality')

In [None]:
# Get the top 80s node with the highest betweenness centrality
sorted_nodes = sorted(G80s.nodes(data=True), key=lambda x: x[1]['betweenness_centrality'], reverse=True)
for node, attributes in sorted_nodes[:1]:
    print(f"Node {node}, attributes: {attributes}")

# Closeness Centrality

In [None]:
# Calculate 70s closeness centrality
closeness = nx.betweenness_centrality(G70s)

# Add closeness centrality as an attribute to nodes
nx.set_node_attributes(G70s, closeness, 'closeness_centrality')

In [None]:
# Get the top 70s node with the highest closeness centrality
sorted_nodes = sorted(G70s.nodes(data=True), key=lambda x: x[1]['closeness_centrality'], reverse=True)
for node, attributes in sorted_nodes[:1]:
    print(f"Node {node}, attributes: {attributes}")

In [None]:
# Calculate 80s closeness centrality
closeness = nx.betweenness_centrality(G80s)

# Add closeness centrality as an attribute to nodes
nx.set_node_attributes(G80s, closeness, 'closeness_centrality')

In [None]:
# Get the top 80s node with the highest closeness centrality
sorted_nodes = sorted(G80s.nodes(data=True), key=lambda x: x[1]['closeness_centrality'], reverse=True)
for node, attributes in sorted_nodes[:1]:
    print(f"Node {node}, attributes: {attributes}")

# Louvain Community Detection

In [61]:
# Perform Louvain Community Detection on 70s
partitions = nx.community.louvain_partitions(G70s, resolution=1)

# Add community assignment as an attribute to each node
community_id = 0
for partition in partitions:
    for part in partition:
            for node in list(part):
                G70s.nodes[node]['community'] = community_id
            community_id += 1

In [62]:
# Get top 70s node with the highest community number
sorted_nodes = sorted(G70s.nodes(data=True), key=lambda x: x[1]['community'], reverse=True)
for node, attributes in sorted_nodes[:1]:
    print(f"Node {node}, attributes: {attributes}")

Node nm0016252, attributes: {'category': 'composer', 'eigenvector_centrality': 5.042802581544496e-15, 'community': 4114}


In [63]:
# Perform Louvain Community Detection on 80s
partitions = nx.community.louvain_partitions(G80s, resolution=1)

# Add community assignment as an attribute to each node
community_id = 0
for partition in partitions:
    for part in partition:
        for node in list(part):
            G80s.nodes[node]['community'] = community_id
        community_id += 1

In [64]:
# Get top 80s node with the highest community number
sorted_nodes = sorted(G80s.nodes(data=True), key=lambda x: x[1]['community'], reverse=True)
for node, attributes in sorted_nodes[:1]:
    print(f"Node {node}, attributes: {attributes}")

Node nm0166044, attributes: {'category': 'actor', 'eigenvector_centrality': 8.173589181698056e-31, 'community': 5576}


# Convert 70s and 80s graphs to json

In [None]:
from networkx.readwrite import json_graph
import json

In [None]:
# Convert the 70s graph to a JSON-compatible data structure
data = json_graph.node_link_data(G70s)

# Save the data to a JSON file
filename = "Data/Graphs/G70s.json"
with open(filename, "w") as f:
    json.dump(data, f, indent=4)

print("Graph converted and saved as", filename)

In [None]:
# Convert the 80s graph to a JSON-compatible data structure
data = json_graph.node_link_data(G80s)

# Save the data to a JSON file
filename = "Data/Graphs/G80s.json"
with open(filename, "w") as f:
    json.dump(data, f, indent=4)

print("Graph converted and saved as", filename)