In [None]:
import os
from raphtory import Graph

def load_graphs_lazy(graph_paths):
    """Generator function to lazily load graphs from files."""
    for path in graph_paths:
        if os.path.isfile(path):
            print(f"Loading graph from: {path}")
            yield Graph.load_from_file(path)
        else:
            print(f"File does not exist: {path}")
            yield None

graph_paths = [f"C:/ms_bincode/Graph_{i}" for i in range(1, 25)]

# Use the generator to iterate over graphs lazily
graphs_lazy = load_graphs_lazy(graph_paths)

In [None]:
from raphtory import Graph
import numpy as np
import pickle

"""
Convertion of multiple graph binecode files into adjacency matrices
"""

def load_graphs_lazy(graph_paths):
    """Generator function to lazily load graphs from files."""
    for path in graph_paths:
        if os.path.isfile(path):
            print(f"Loading graph from: {path}")
            yield Graph.load_from_file(path)
        else:
            print(f"File does not exist: {path}")
            yield None

def transfor_into_adjacency_matrix(graph):
    node_index = {node_name: i for i, node_name in enumerate(list(graph.vertices.name))}
    adjacency_matrix = np.zeros((len(node_index), len(node_index)), dtype=int)
    for edge in graph.edges:
        src_index = node_index[edge.src.name]
        dst_index = node_index[edge.dst.name]
        adjacency_matrix[src_index, dst_index] = 1
    return adjacency_matrix

graph_paths = [f"C:/ms_bincode/Graph_{i}" for i in range(4, 5)]

# Use the generator to iterate over graphs lazily
graphs_lazy = load_graphs_lazy(graph_paths)

# Initialise a nested dictionary
A_dict = {}

# Use the generator to iterate over graphs lazily
for i, graph in enumerate(graphs_lazy):
    if graph is not None:
        # For each minute segment, generate an adjacency matrix and store it.
        A_dict[f"g{i+1}"] = {}
        for windowed_graph in graph.rolling(window=60000):
            minute_graph_segment = windowed_graph
            adjacency_matrix = transfor_into_adjacency_matrix(minute_graph_segment)
            A_dict[f"g{i+1}"][str(windowed_graph)] = adjacency_matrix


# Pickle the nested dictionary
with open("graphs_adjacency_matrices.pkl", "wb") as f:
    pickle.dump(A_dict, f)

print("Nested dictionary pickled successfully.")

# Convertion of a single graph binecode file into an adjacency matrix

In [1]:
import os
from raphtory import Graph

# Specify the file path
binecode_path = "C:/ms_bincode/Graph_4"

# Load the file
if os.path.isfile(binecode_path):
    g = Graph.load_from_file(binecode_path)
    print(g)
else:
    print(f"File does not exist: {binecode_path}")

Graph(number_of_edges=91201, number_of_vertices=25666, number_of_temporal_edges=145486068, earliest_time="10800000", latest_time="14399999")


In [2]:
from scipy.sparse import lil_matrix, csr_matrix
import numpy as np

def populate_adjacency_matrix(graph, adjacency_matrix):
    node_index = {node_name: i for i, node_name in enumerate(list(graph.vertices.name))}
    for edge in graph.edges:
        adjacency_matrix[node_index[edge.src.name], node_index[edge.dst.name]] = 1
    return adjacency_matrix

# Initialise a nested dictionary
A_dict = {}
segment_counter = 1

# Instead of a dense matrix, initialise a sparse adjacency matrix using LIL format for efficient insertion
num_vertices = len(list(g.vertices.name))
adjacency_matrix = lil_matrix((num_vertices, num_vertices), dtype=int)

# For each minute segment, generate an adjacency matrix and store it as a CSR matrix for efficient arithmetic and matrix operations later
for windowed_graph in g.rolling(window=60000):
    # Make sure to create a new sparse matrix for each window to avoid overwriting
    populated_adjacency_matrix = populate_adjacency_matrix(windowed_graph, adjacency_matrix.copy())
    # Convert to CSR format after populating for efficient storage and future operations
    A_dict[str(segment_counter)] = populated_adjacency_matrix.tocsr()
    segment_counter += 1  # Increment the segment counter for the next segment

In [4]:
print(A_dict)

{'1': <25666x25666 sparse matrix of type '<class 'numpy.int32'>'
	with 23296 stored elements in Compressed Sparse Row format>, '2': <25666x25666 sparse matrix of type '<class 'numpy.int32'>'
	with 22678 stored elements in Compressed Sparse Row format>, '3': <25666x25666 sparse matrix of type '<class 'numpy.int32'>'
	with 22088 stored elements in Compressed Sparse Row format>, '4': <25666x25666 sparse matrix of type '<class 'numpy.int32'>'
	with 22255 stored elements in Compressed Sparse Row format>, '5': <25666x25666 sparse matrix of type '<class 'numpy.int32'>'
	with 22492 stored elements in Compressed Sparse Row format>, '6': <25666x25666 sparse matrix of type '<class 'numpy.int32'>'
	with 22506 stored elements in Compressed Sparse Row format>, '7': <25666x25666 sparse matrix of type '<class 'numpy.int32'>'
	with 22171 stored elements in Compressed Sparse Row format>, '8': <25666x25666 sparse matrix of type '<class 'numpy.int32'>'
	with 21981 stored elements in Compressed Sparse Row 

In [5]:
from scipy.sparse.csgraph import laplacian
from scipy.sparse.linalg import eigsh  # For computing eigenvalues of sparse matrices
import numpy as np
from concurrent.futures import ProcessPoolExecutor
from scipy.sparse import csr_matrix

def eigenspectrum_sparse(L, k=6):
    # Compute the k smallest eigenvalues using shift-invert mode for better precision on small eigenvalues
    # Adjust k based on the number of eigenvalues you're interested in
    eigvals, _ = eigsh(L, k=k, which='SM', return_eigenvectors=False)
    return eigvals

def compute_eigenspectrum(args):
    A, norm = args
    L = laplacian(A, normed=norm, return_diag=False)
    return eigenspectrum_sparse(L)

def all_spectrums_sparse_parallel(A_dict, norm=True, num_processes=None):
    num_graphs = len(A_dict)
    num_nodes = next(iter(A_dict.values())).shape[0]
    # Assuming we are interested in a smaller subset of eigenvalues for such large matrices
    k = min(6, num_nodes-1)  # Adjust based on your specific needs
    eigenspectrums_transposed = np.zeros((num_graphs, k))

    with ProcessPoolExecutor(max_workers=num_processes) as executor:
        results = list(executor.map(compute_eigenspectrum, [(A_dict[key], norm) for key in A_dict]))

    for i, result in enumerate(results):
        eigenspectrums_transposed[i, :] = result

    return eigenspectrums_transposed



In [6]:
eigenspectrums = all_spectrums_parallel(A_dict, norm=True)

In [None]:
# Output the result
print(eigenspectrums)

In [None]:
from pystates import all_spectrums, snapshot_dist
from scipy.spatial import distance
import pickle
from sklearn.manifold import MDS
from sklearn.cluster import KMeans, AgglomerativeClustering
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import random
from random import randint

dist2 = distance.cdist(eigenspectrums,eigenspectrums,'euclidean')
mds = MDS(n_components=3,dissimilarity='precomputed',random_state=0)
results = mds.fit(dist1)
coords = results.embedding_

agg = AgglomerativeClustering().fit(coords)
labels = agg.labels_
colours = []
for i in range(np.max(labels)+1):
    colours.append('#%06X' % randint(0, 0xFFFFFF))
cmap=[]
for i in labels:
    cmap.append(colours[i])
plt.figure()
plt.scatter(coords[:,0],coords[:,1],c=cmap)

plt.figure()
plt.plot(labels,'k.')

In [None]:
import numpy as np
from scipy.sparse.csgraph import laplacian

def eigenspectrum(L):
    # Calculate the eigenvalues, take their real part, sort in ascending order
    eigvals = np.linalg.eigvalsh(L)  # eigvalsh is more efficient for Hermitian matrices
    return np.sort(eigvals)

    
def all_spectrums(A_dict, eigenspectrums_transposed, norm=True):
    # Process each adjacency matrix
    for i, (key, A) in enumerate(A_dict.items()):
        L = laplacian(A, normed=norm)
        eigenspectrums_transposed[i, :] = eigenspectrum(L)

    # No need to transpose at the end, as we fill the array in the desired orientation
    return eigenspectrums_transposed

In [None]:
adjacency_matrix = np.zeros((len(list(g.vertices.name)), len(list(g.vertices.name))), dtype=int)
eigenspectrums = all_spectrums(A_dict,norm=True, adjacency_matrix)

# Early logic

In [1]:
from raphtory import Graph
import numpy as np
import pickle

# Incoporate lazy loading

# Identify all unique nodes
#print(list(g.vertices.name))
# [ms3123,ms323,...]

# Create a node index map
node_index = {node_name: i for i, node_name in enumerate(list(g.vertices.name))}
#print(node_index)
# [ms3123:0, ms323:1,...]

# Initialise adjacency matrix
adjacency_matrix = np.zeros((len(list(g.vertices.name)), len(list(g.vertices.name))), dtype=int)

# Populate adjacency matrix
for edge in g.edges:
    src_index = node_index[edge.src.name]
    dst_index = node_index[edge.dst.name]

    adjacency_matrix[src_index, dst_index] = 1

#print(adjacency_matrix)
#[[0 1 0 ... 0 0 0]
# [0 0 0 ... 0 0 0]
# [0 0 1 ... 0 0 0]
# ...
# [0 0 0 ... 0 0 0]
# [0 0 0 ... 0 0 0]
# [0 0 0 ... 0 0 0]]

A_dict = {'1': adjacency_matrix}
print(A_dict)

# Turn in into a nested dictionaries

# Such as:
# A_dict = {
#  "g1" : {
#       "1" : adjacency_matrix,
#       "2" : adjacency_matrix,
#           .
#           .
#           .
#   },
#   "g2" : {
#       "1" : adjacency_matrix,
#       "2" : adjacency_matrix,
#           .
#           .
#           .
#   },
#}

# Pickle the dictionary variable
with open("graphs_adjacency_matrices.pkl", "wb") as f:
    pickle.dump(A_dict, f)

NameError: name 'g' is not defined