In [6]:
import networkx as nx
import gzip
import pandas as pd
from scipy import sparse
import numpy as np
import csv

In [7]:
edges_path = "large_twitch_edges.csv"
features_path = "large_twitch_features.csv"

In [8]:
# Load Facebook edges data
edges_df = pd.read_csv(edges_path)
print(f"Loaded {len(edges_df)} edges")
print("Edges columns:", edges_df.columns.tolist())
print("First few edges:")
print(edges_df.head())

# Create NetworkX graph from edges
G = nx.from_pandas_edgelist(edges_df, source='numeric_id_1', target='numeric_id_2')
print(f"\nCreated graph with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges")

# Convert to adjacency matrix (sparse format)
adjacency_matrix = nx.adjacency_matrix(G)
print(f"Adjacency matrix shape: {adjacency_matrix.shape}")
print(f"Adjacency matrix density: {adjacency_matrix.nnz / (adjacency_matrix.shape[0] * adjacency_matrix.shape[1]):.6f}")

Loaded 6797557 edges
Edges columns: ['numeric_id_1', 'numeric_id_2']
First few edges:
   numeric_id_1  numeric_id_2
0         98343        141493
1         98343         58736
2         98343        140703
3         98343        151401
4         98343        157118

Created graph with 168114 nodes and 6797557 edges

Created graph with 168114 nodes and 6797557 edges
Adjacency matrix shape: (168114, 168114)
Adjacency matrix density: 0.000481
Adjacency matrix shape: (168114, 168114)
Adjacency matrix density: 0.000481


In [9]:
# Compute ML-ready data structures
# X: all node indices
X = np.array(list(G.nodes()))
print(f"X (node indices) shape: {X.shape}")
print(f"Node indices range: {X.min()} to {X.max()}")

# y: all degrees of each node
y = np.array([G.degree(node) for node in X])
print(f"y (node degrees) shape: {y.shape}")
print(f"Degree statistics - min: {y.min()}, max: {y.max()}, mean: {y.mean():.2f}")

# adj_matrix: the adjacency matrix (CSR sparse format only)
adj_matrix = adjacency_matrix.tocsr()  # Ensure CSR format
print(f"adj_matrix shape: {adj_matrix.shape}")
print(f"adj_matrix format: {adj_matrix.format}")
print(f"adj_matrix density: {adj_matrix.nnz / (adj_matrix.shape[0] * adj_matrix.shape[1]):.6f}")

X (node indices) shape: (168114,)
Node indices range: 0 to 168113
y (node degrees) shape: (168114,)
Degree statistics - min: 1, max: 35279, mean: 80.87
adj_matrix shape: (168114, 168114)
adj_matrix format: csr
adj_matrix density: 0.000481
y (node degrees) shape: (168114,)
Degree statistics - min: 1, max: 35279, mean: 80.87
adj_matrix shape: (168114, 168114)
adj_matrix format: csr
adj_matrix density: 0.000481
