# Graph Creation

In [1]:
import networkx as nx
import pandas as pd

In [2]:
# General paths

# 70s
general70s_path = "Data/EdgeData/1970s/General/70s.csv"

# 80s
general80s_path = "Data/EdgeData/1980s/General/80s.csv"

# categorical paths

# 70s
categorical70s_path = "Data/EdgeData/1970s/Categorical"

# 80s
categorical80s_path = "Data/EdgeData/1980s/Categorical"

In [3]:
# A set of categories
categories = {'cinematographer', 'archive_footage', 'self', 'composer', 'writer', 'actress', 'director', 'editor',
              'producer', 'archive_sound', 'actor', 'production_designer'}

In [4]:
# Nodes data initialisation
nodedata70s = []
nodedata80s = []

In [5]:
# add 70s nodes
for category in categories:
    category70s_path = categorical70s_path + "/{}70s.csv".format(category)

    category70s = pd.read_csv(category70s_path, sep=',', low_memory=False)

    nodes70s_set = set(category70s['Source'])
    nodes70s_set = nodes70s_set.union(set(category70s['Target']))

    nodedata70s.append(list(nodes70s_set))

In [6]:
# add 80s nodes
for category in categories:
    category80s_path = categorical80s_path + "/{}80s.csv".format(category)

    category80s = pd.read_csv(category80s_path, sep=',', low_memory=False)

    nodes80s_set = set(category80s['Source'])

    nodes80s_set = nodes80s_set.union(set(category80s['Target']))

    nodedata80s.append(list(nodes80s_set))

In [7]:
# Edge data initialisation
edges70s = pd.read_csv(general70s_path, sep=',', low_memory=False)
edges80s = pd.read_csv(general80s_path, sep=',', low_memory=False)

In [8]:
# Initialize graph
G70s= nx.Graph()
G80s= nx.Graph()

In [9]:
# Add 70s nodes
for i in range(len(nodedata70s)):
    for node in nodedata70s[i]:
        G70s.add_node(node, category=list(categories)[i])

In [10]:
# Add 80s nodes
for i in range(len(nodedata80s)):
    for node in nodedata80s[i]:
        G80s.add_node(node, category=list(categories)[i])

In [11]:
# Add 70s edges
G70s.add_edges_from(edges70s.values)

In [12]:
# Add 80s edges
G80s.add_edges_from(edges80s.values)

In [13]:
# Graph is undirected
G70s = nx.to_undirected(G70s)
G80s = nx.to_undirected(G80s)

# Analysis
## Eigenvector Centrality

In [85]:
# Calculate 70s eigenvector centrality
centrality_scores = nx.eigenvector_centrality(G70s)

# Add eigenvector centrality as an attribute to nodes
nx.set_node_attributes(G70s, centrality_scores, 'eigenvector_centrality')

In [86]:
# Print the eigenvector centrality attribute for each node
for node, data in G70s.nodes(data=True):
    eigenvector_centrality = data['eigenvector_centrality']

In [87]:
# Calculate 80s eigenvector centrality
centrality_scores = nx.eigenvector_centrality(G80s)

# Add eigenvector centrality as an attribute to nodes
nx.set_node_attributes(G80s, centrality_scores, 'eigenvector_centrality')

In [88]:
# Print the eigenvector centrality attribute for each node
for node, data in G80s.nodes(data=True):
    eigenvector_centrality = data['eigenvector_centrality']

In [89]:
# Get the top 70s node with the highest eigenvector centrality
sorted_nodes = sorted(G70s.nodes(data=True), key=lambda x: x[1]['eigenvector_centrality'], reverse=True)
for node, attributes in sorted_nodes[:1]:
    print(f"Node {node}, attributes: {attributes}")

Node nm0473742, attributes: {'category': 'composer', 'eigenvector_centrality': 0.14117340737650352, 'betweenness_centrality': 0.002317191412883683, 'closeness_centrality': 0.002317191412883683, 'community': 3478}


In [90]:
# Get the top 80s node with the highest eigenvector centrality
sorted_nodes = sorted(G80s.nodes(data=True), key=lambda x: x[1]['eigenvector_centrality'], reverse=True)
for node, attributes in sorted_nodes[:1]:
    print(f"Node {node}, attributes: {attributes}")

Node nm0007123, attributes: {'category': 'actor', 'eigenvector_centrality': 0.19065903679970617, 'betweenness_centrality': 0.004268091861912812, 'community': 4833}


# Betweeness Centrality

In [91]:
# Calculate 70s betweenness centrality
betweenness = nx.betweenness_centrality(G70s)

# Add betweenness centrality as an attribute to nodes
nx.set_node_attributes(G70s, betweenness, 'betweenness_centrality')

KeyboardInterrupt: 

In [96]:
# Get the top 70s node with the highest betweenness centrality
sorted_nodes = sorted(G70s.nodes(data=True), key=lambda x: x[1]['betweenness_centrality'], reverse=True)
for node, attributes in sorted_nodes[:1]:
    print(f"Node {node}, attributes: {attributes}")

Node nm0006762, attributes: {'category': 'actor', 'eigenvector_centrality': 0.006662391537414313, 'betweenness_centrality': 0.09259497671090378, 'closeness_centrality': 0.09259497671090378, 'community': 3478}


In [93]:
# Calculate 80s betweenness centrality
betweenness = nx.betweenness_centrality(G80s)

# Add betweenness centrality as an attribute to nodes
nx.set_node_attributes(G80s, betweenness, 'betweenness_centrality')

KeyboardInterrupt: 

In [97]:
# Get the top 80s node with the highest betweenness centrality
sorted_nodes = sorted(G80s.nodes(data=True), key=lambda x: x[1]['betweenness_centrality'], reverse=True)
for node, attributes in sorted_nodes[:1]:
    print(f"Node {node}, attributes: {attributes}")

Node nm0000636, attributes: {'category': 'writer', 'eigenvector_centrality': 0.0008252645182580747, 'betweenness_centrality': 0.06989936135578094, 'community': 4833}


# Closeness Centrality

In [95]:
# Calculate 70s closeness centrality
closeness = nx.betweenness_centrality(G70s)

# Add closeness centrality as an attribute to nodes
nx.set_node_attributes(G70s, closeness, 'closeness_centrality')

KeyboardInterrupt: 

In [99]:
# Get the top 70s node with the highest closeness centrality
sorted_nodes = sorted(G70s.nodes(data=True), key=lambda x: x[1]['closeness_centrality'], reverse=True)
for node, attributes in sorted_nodes[:1]:
    print(f"Node {node}, attributes: {attributes}")

Node nm0006762, attributes: {'category': 'actor', 'eigenvector_centrality': 0.006662391537414313, 'betweenness_centrality': 0.09259497671090378, 'closeness_centrality': 0.09259497671090378, 'community': 3478}


In [None]:
# Calculate 80s closeness centrality
closeness = nx.betweenness_centrality(G80s)

# Add closeness centrality as an attribute to nodes
nx.set_node_attributes(G80s, closeness, 'closeness_centrality')

In [101]:
# Get the top 80s node with the highest closeness centrality
sorted_nodes = sorted(G80s.nodes(data=True), key=lambda x: x[1]['closeness_centrality'], reverse=True)
for node, attributes in sorted_nodes[:1]:
    print(f"Node {node}, attributes: {attributes}")

KeyError: 'closeness_centrality'

# Louvain Community Detection

In [102]:
# Perform Louvain Community Detection on 70s
partitions = nx.community.louvain_partitions(G70s, resolution=0)

# Add community assignment as an attribute to each node
community_id = 0
for partition in partitions:
    for part in partition:
            for node in list(part):
                G70s.nodes[node]['community'] = community_id
            community_id += 1

KeyboardInterrupt: 

In [82]:
# Get top 70s node with the highest community number
sorted_nodes = sorted(G70s.nodes(data=True), key=lambda x: x[1]['community'], reverse=True)
for node, attributes in sorted_nodes[:1]:
    print(f"Node {node}, attributes: {attributes}")

Node nm0211132, attributes: {'category': 'editor', 'eigenvector_centrality': 2.0179539410779127e-28, 'betweenness_centrality': 0.0, 'closeness_centrality': 0.0, 'community': 3751}


In [83]:
# Perform Louvain Community Detection on 80s
partitions = nx.community.louvain_partitions(G80s, resolution=0)

# Add community assignment as an attribute to each node
community_id = 0
for partition in partitions:
    for part in partition:
        for node in list(part):
            G80s.nodes[node]['community'] = community_id
        community_id += 1

In [84]:
# Get top 80s node with the highest community number
sorted_nodes = sorted(G80s.nodes(data=True), key=lambda x: x[1]['community'], reverse=True)
for node, attributes in sorted_nodes[:1]:
    print(f"Node {node}, attributes: {attributes}")

Node nm1190140, attributes: {'category': 'writer', 'eigenvector_centrality': 1.0924588980201526e-34, 'betweenness_centrality': 0.0, 'community': 5219}


# Convert 70s and 80s graphs to json

In [26]:
from networkx.readwrite import json_graph
import json

In [27]:
# Convert the 70s graph to a JSON-compatible data structure
data = json_graph.node_link_data(G70s)

# Save the data to a JSON file
filename = "Data/Graphs/G70s.json"
with open(filename, "w") as f:
    json.dump(data, f, indent=4)

print("Graph converted and saved as", filename)

Graph converted and saved as Data/Graphs/G70s.json


In [28]:
# Convert the 80s graph to a JSON-compatible data structure
data = json_graph.node_link_data(G80s)

# Save the data to a JSON file
filename = "Data/Graphs/G80s.json"
with open(filename, "w") as f:
    json.dump(data, f, indent=4)

print("Graph converted and saved as", filename)

Graph converted and saved as Data/Graphs/G80s.json
