In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
from IPython.display import display
import polars as pl
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from typing import List, Tuple
from pathlib import Path
import os
import torch
from torch import Tensor
from torch_geometric.utils import to_networkx
from torch_geometric.datasets import Planetoid
from torch_geometric.utils import coalesce
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data_dir = "./data"
os.makedirs(data_dir, exist_ok=True)

In [None]:
#l2g related imports
from l2gv2.patch.clustering import louvain_clustering
from l2gv2.graphs.tgraph import TGraph
from l2gv2.graphs.npgraph import NPGraph
from l2gv2.patch.patches import create_patch_data


# <font color="grey"> Hierarchical construction of Patch Graphs</font>

###  <a id='chapter1'> <font color="grey">1. The local2global approach </font></a>

The initial step in creating the patch graph consists of clustering the graph. The clusters 

The Cora dataset is a well-known dataset in the field of graph research. This consists of 2708 scientific publications classified into one of seven classes. The citation network consists of 5429 links. Each publication in the dataset is described by a 0/1-valued word vector indicating the absence/presence of the corresponding word from the dictionary. The dictionary consists of 1433 unique words.

In [3]:
from l2gv2.datasets import get_dataset

In [41]:
dataset = get_dataset("as-733")

In [42]:
dataset.x

AttributeError: 'AS733Dataset' object has no attribute 'x'

In [6]:
from torch_geometric.data import InMemoryDataset, Dataset

In [39]:
for d in dataset:
    print(d)

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])


In [None]:
G = to_networkx(data, to_undirected=True)
degrees = [val for (node, val) in G.degree()]
display(pd.DataFrame(pd.Series(degrees).describe()).transpose().round(2))
print(len(degrees))
print(sum(degrees))
plt.figure(figsize=(10, 6))
plt.hist(degrees, bins=50)
plt.xlabel("node degree")
plt.show()

In [None]:
G = to_networkx(data, to_undirected=True)
pos = nx.spring_layout(G, seed=42)
cent = nx.degree_centrality(G)
node_size = list(map(lambda x: x * 500, cent.values()))
cent_array = np.array(list(cent.values()))
threshold = sorted(cent_array, reverse=True)[10]
print("threshold", threshold)
cent_bin = np.where(cent_array >= threshold, 1, 0.1)
plt.figure(figsize=(12, 12))
nodes = nx.draw_networkx_nodes(G, pos, node_size=node_size,
                               cmap=plt.cm.plasma,
                               node_color=cent_bin,
                               nodelist=list(cent.keys()),
                               alpha=cent_bin)
edges = nx.draw_networkx_edges(G, pos, width=0.25, alpha=0.3)
plt.show()

In [None]:
label_dict = {
    0: "Theory",
    1: "Reinforcement_Learning",
    2: "Genetic_Algorithms",
    3: "Neural_Networks",
    4: "Probabilistic_Methods",
    5: "Case_Based",
    6: "Rule_Learning"}
data.y[:10]

In [2]:
aspath = Path("../data/snap-as/as_edges.parquet")

In [None]:
df = pl.read_parquet(aspath)
df.head()

In [None]:
# Parameters: 
# 10 patches
# Average degree k=4
# Overlap between 256 and 1024
# Embedding dimension up to 128

In [None]:
def hierarchical_cluster_and_embed(graph: nx.Graph, m: int, k: int) -> List[Tuple[nx.Graph, List[float]]]:
    if graph.number_of_nodes() <= m:
        return [(graph, embed(graph))]
    
    clusters = cluster(graph, k)
    results = []
    
    for subgraph in clusters:
        results.extend(hierarchical_cluster_and_embed(subgraph, m, k))
    
    return results

In [None]:
edge_index = data.edge_index.numpy()
print(edge_index.shape)