In [2]:
import igraph as ig


track_network_path = "../network-parser/track_graph.graphml"
artist_network_path = "../network-parser/artist_graph.graphml"

# read graphml files
track_network = ig.Graph.Read_GraphML(track_network_path)
artist_network = ig.Graph.Read_GraphML(artist_network_path)

# force convert to undirected
track_network.to_undirected()
artist_network.to_undirected()

# force convert to simple
track_network.simplify()
artist_network.simplify()

# confirm
print(track_network.is_simple())
print(artist_network.is_simple())



True
True


# Preliminary analysis

In [3]:
import numpy as np
import pandas as pd
from tqdm import tqdm

# calculate mean degree with progress bar
track_network_degree = []
artist_network_degree = []

for vertex in tqdm(track_network.vs, desc="Track Network Degree Calculation"):
    track_network_degree.append(track_network.degree(vertex))

for vertex in tqdm(artist_network.vs, desc="Artist Network Degree Calculation"):
    artist_network_degree.append(artist_network.degree(vertex))

print("Mean degree of track network: ", np.mean(track_network_degree))
print("Mean degree of artist network: ", np.mean(artist_network_degree))

# calculate mean clustering coefficient with progress bar
track_network_clustering_coefficient = []
artist_network_clustering_coefficient = []

for vertex in tqdm(track_network.vs, desc="Track Network Clustering Coefficient Calculation"):
    track_network_clustering_coefficient.append(track_network.transitivity_local_undirected(vertices=[vertex])[0])

for vertex in tqdm(artist_network.vs, desc="Artist Network Clustering Coefficient Calculation"):
    artist_network_clustering_coefficient.append(artist_network.transitivity_local_undirected(vertices=[vertex])[0])

print("Mean clustering coefficient of track network: ", np.mean(track_network_clustering_coefficient))
print("Mean clustering coefficient of artist network: ", np.mean(artist_network_clustering_coefficient))


Track Network Degree Calculation: 100%|██████████| 58558/58558 [00:00<00:00, 1431844.73it/s]
Artist Network Degree Calculation: 100%|██████████| 14820/14820 [00:00<00:00, 1463267.07it/s]


Mean degree of track network:  221.78520441271903
Mean degree of artist network:  195.472334682861


Track Network Clustering Coefficient Calculation: 100%|██████████| 58558/58558 [00:43<00:00, 1347.70it/s]
Artist Network Clustering Coefficient Calculation: 100%|██████████| 14820/14820 [00:07<00:00, 1975.44it/s]

Mean clustering coefficient of track network:  0.8465188127450948
Mean clustering coefficient of artist network:  0.8005243710916299





In [8]:
# visualize sample of both networks
import matplotlib.pyplot as plt

# fig, axs = plt.subplots(1, 2, figsize=(12, 6))

sample_size = 1000

track_network_vertices = np.random.choice(track_network.vs, sample_size, replace=False)
artist_network_vertices = np.random.choice(artist_network.vs, sample_size, replace=False)

track_network_edges = track_network.get_edgelist()
artist_network_edges = artist_network.get_edgelist()

track_network_subgraph = track_network.subgraph(track_network_vertices)
artist_network_subgraph = artist_network.subgraph(artist_network_vertices)


# Create a figure for the track network
track_fig, track_ax = plt.subplots()
track_layout = track_network_subgraph.layout_auto()
ig.plot(track_network_subgraph, vertex_size=1, target=track_ax, layout=track_layout)
track_ax.set_title("Track Network Sample")
plt.tight_layout()
plt.savefig("track_network_sample.png")
plt.close(track_fig)

# Create a figure for the artist network
artist_fig, artist_ax = plt.subplots()
artist_layout = artist_network_subgraph.layout_auto()
ig.plot(artist_network_subgraph, vertex_size=1, target=artist_ax, layout=artist_layout)
artist_ax.set_title("Artist Network Sample")
plt.tight_layout()
plt.savefig("artist_network_sample.png")
plt.close(artist_fig)



In [17]:
import matplotlib.pyplot as plt

# # of connected components
track_network_connected_components = track_network.components()
artist_network_connected_components = artist_network.components()

print("Number of connected components in track network: ", len(track_network_connected_components))
print("Number of connected components in artist network: ", len(artist_network_connected_components))

# size distribution of connected components
track_network_connected_components_size = []
artist_network_connected_components_size = []

for component in track_network_connected_components:
    track_network_connected_components_size.append(len(component))

for component in artist_network_connected_components:
    artist_network_connected_components_size.append(len(component))

# print the sizes of components
print("Sizes of connected components in track network: ", track_network_connected_components_size)
print("Sizes of connected components in artist network: ", artist_network_connected_components_size)

# save the largest connected component for later analysis
track_network_giant = track_network.subgraph(track_network_connected_components[np.argmax(track_network_connected_components_size)])
artist_network_giant = artist_network.subgraph(artist_network_connected_components[np.argmax(artist_network_connected_components_size)])

# print content of smmallest components
track_network_smallest_components = track_network.subgraph(track_network_connected_components[np.argmin(track_network_connected_components_size)])
artist_network_smallest_components = artist_network.subgraph(artist_network_connected_components[np.argmin(artist_network_connected_components_size)])

print("Number of vertices in track network smallest component: ", track_network_smallest_components.vcount())
print("Number of edges in track network smallest component: ", track_network_smallest_components.ecount())

print("Number of vertices in artist network smallest component: ", artist_network_smallest_components.vcount())
print("Number of edges in artist network smallest component: ", artist_network_smallest_components.ecount())

# print all attributes of the smallest components
print("Attributes of smallest components in track network: ", track_network_smallest_components.vs.attributes())
print("Attributes of smallest components in artist network: ", artist_network_smallest_components.vs.attributes())

# print labels of non-giant components
print("Labels of smallest components in track network: ", track_network_smallest_components.vs["id"])
print("Labels of smallest components in artist network: ", artist_network_smallest_components.vs["id"])





Number of connected components in track network:  29
Number of connected components in artist network:  6
Sizes of connected components in track network:  [57053, 111, 138, 21, 86, 236, 38, 183, 8, 50, 104, 21, 24, 86, 13, 153, 24, 15, 12, 39, 14, 39, 17, 23, 8, 13, 6, 18, 5]
Sizes of connected components in artist network:  [14763, 21, 6, 8, 8, 14]
Number of vertices in track network smallest component:  5
Number of edges in track network smallest component:  10
Number of vertices in artist network smallest component:  6
Number of edges in artist network smallest component:  15
Attributes of smallest components in track network:  ['id']
Attributes of smallest components in artist network:  ['id']
Labels of smallest components in track network:  ['spotify:track:3StCElHKaN6ASLf7Pymdum', 'spotify:track:1zRqRmZyE0bgXkSoVVaCwK', 'spotify:track:6viXsuUv5BEnvVYLFCk8os', 'spotify:track:7ETuZtnJJ1brMNiPpd2LFg', 'spotify:track:5mahfoOSoSDgkLRI5MMHj7']
Labels of smallest components in artist net

In [18]:
# calculate mean shortest path length for a random sample of 1000 vertices
track_network_shortest_path_length = []
artist_network_shortest_path_length = []

# sample 1000 vertices
track_network_vertices = np.random.choice(track_network_giant.vs, 1000, replace=False)
artist_network_vertices = np.random.choice(artist_network_giant.vs, 1000, replace=False)

for vertex in tqdm(track_network_vertices, desc="Track Network Shortest Path Calculation"):
    track_network_shortest_path_length.append(np.mean(track_network_giant.distances(vertex)[0]))

for vertex in tqdm(artist_network_vertices, desc="Artist Network Shortest Path Calculation"):
    artist_network_shortest_path_length.append(np.mean(artist_network_giant.distances(vertex)[0]))

print("Mean shortest path length of track network: ", np.mean(track_network_shortest_path_length))
print("Diameter of track network: ", np.max(track_network_shortest_path_length))

print("Mean shortest path length of artist network: ", np.mean(artist_network_shortest_path_length))
print("Diameter of artist network: ", np.max(artist_network_shortest_path_length))


Track Network Shortest Path Calculation: 100%|██████████| 1000/1000 [01:27<00:00, 11.48it/s]
Artist Network Shortest Path Calculation: 100%|██████████| 1000/1000 [00:13<00:00, 73.19it/s]

Mean shortest path length of track network:  2.9445954288118066
Diameter of track network:  4.535414439205651
Mean shortest path length of artist network:  2.5251248391248393
Diameter of artist network:  4.85815891079049





In [9]:
# Plot degree distribution
import matplotlib.pyplot as plt

# fig, axs = plt.subplots(1, 2, figsize=(12, 6))

# axs[0].hist(track_network_degree, bins=50, edgecolor='black')
# axs[0].set_title('Track Network Degree Distribution')
# axs[0].set_xlabel('Degree')
# axs[0].set_ylabel('Frequency')

# axs[1].hist(artist_network_degree, bins=50, edgecolor='black')
# axs[1].set_title('Artist Network Degree Distribution')
# axs[1].set_xlabel('Degree')
# axs[1].set_ylabel('Frequency')

# plt.show()

track_degree_fig, track_degree_ax = plt.subplots()
track_degree_ax.hist(track_network_degree, bins=50, edgecolor='black')
track_degree_ax.set_title('Track Network Degree Distribution')
track_degree_ax.set_xlabel('Degree')
track_degree_ax.set_ylabel('Frequency')
plt.savefig("track_network_degree_distribution.png")
plt.close(track_degree_fig)

artist_degree_fig, artist_degree_ax = plt.subplots()
artist_degree_ax.hist(artist_network_degree, bins=50, edgecolor='black')
artist_degree_ax.set_title('Artist Network Degree Distribution')
artist_degree_ax.set_xlabel('Degree')
artist_degree_ax.set_ylabel('Frequency')
plt.savefig("artist_network_degree_distribution.png")
plt.close(artist_degree_fig)




