In [None]:
class GraphNode:
    def __init__(self, name: str, version: str, timestamp: str):
        self.index = None
        self.name = name
        self.version = version
        self.timestamp = timestamp

In [None]:
import retworkx as rx

In [None]:
graph = rx.PyDiGraph(multigraph=False)

In [None]:
import json

input_data: dict[str, dict[str, dict]]
with open('../../data/output/neo4j_data.json', 'r') as file:
    input_data = json.load(file)

In [None]:
string_id_to_index: dict[str, int] = dict()

for package in input_data.values():
    for version_name, version_data in package['versions'].items():
        string_id_to_index[f"{package['name']}-{version_name}"] = graph.add_node(
            GraphNode(package['name'], version_name, version_data['timestamp']))

for index in graph.node_indices():
    graph[index].index = index

In [None]:
from semantic_version import SimpleSpec, Version

keys = set(input_data.keys())

for index, (pack_name, pack) in enumerate(input_data.items()):

    for version_name, version_data in pack['versions'].items():
        dependent_info = (pack_name, version_name, version_data['timestamp'])
        for dependency_name, dependency_version_constraint in version_data['dependencies'].items():

            try:
                spec = SimpleSpec(dependency_version_constraint)
            except ValueError:
                # Ignore dependencies with non-standard formats
                continue
            if dependency_name in keys:
                for dependency_version in input_data[dependency_name]['versions'].keys():

                    try:
                        semver_version = Version(dependency_version)
                    except ValueError:
                        continue

                    if spec.match(semver_version):
                        graph.add_edge(string_id_to_index[f"{pack_name}-{version_name}"],
                                       string_id_to_index[f"{dependency_name}-{dependency_version}"], None)

    if index % 10_000 == 0:
        print((index / len(input_data)) * 100, "% done!")

In [None]:
print("Nodes: ", len(graph.nodes()))
print("Edges: ", len(graph.edges()))

In [None]:
print(list(map(lambda x: x.name, graph.nodes()))[:100])

In [None]:
between = rx.betweenness_centrality(graph)
between

In [None]:
# WARNING: THIS WILL NOT WORK ON BIG GRAPHS!
import matplotlib.pyplot as plt
from retworkx.visualization import mpl_draw

mpl_draw(graph)
plt.draw()