# Imports -- SEE ALSO DatasetCalculations.py

In [None]:
from ogb.nodeproppred import PygNodePropPredDataset
from torch_geometric.utils import homophily, mask_to_index
from torch_geometric.datasets import Planetoid, Reddit2, FacebookPagePage
import networkx as nx
from torch_geometric.utils import to_networkx
from torch_geometric.transforms import RandomNodeSplit

## Calculate Cluster coefficient

In [None]:
def clustering_coef(cur_data):
    return 0
    # Convert data to a NetworkX graph
    G = to_networkx(cur_data)

    # Compute the average cluster coefficient https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.cluster.average_clustering.html
    average_cc = nx.average_clustering(G)
    return average_cc

## Calculate hompholiy measures

In [None]:
def calc_homophily(cur_data):
    print('Edge Homophily:', homophily(cur_data.edge_index, cur_data.y, method='edge'))
    print('Node Homophily:', homophily(cur_data.edge_index, cur_data.y, method='node'))
    print('Edge Insensitive Homophily:', homophily(cur_data.edge_index, cur_data.y, method='edge_insensitive'))

## Calculate sparsity

In [None]:
def calc_sparsity(cur_data):
    # Convert data to a NetworkX graph
    G = to_networkx(cur_data, to_undirected=True)
    # Compute the density https://networkx.org/documentation/stable/reference/generated/networkx.classes.function.density.html
    density = nx.density(G)
    sparsity = 1 - density
    return sparsity

# Dataset calculations:
## ogbn-arxiv:

In [None]:
# Download and process data at './dataset/ogbn_arxiv/'
dataset = PygNodePropPredDataset(name="ogbn-arxiv")

data = dataset[0]
split_idx = dataset.get_idx_split()
idx_train, idx_val, idx_test = split_idx["train"], split_idx["valid"], split_idx["test"]


print(data)
print("Split: (train / val / test)")
print("\t", len(idx_train), '/', len(idx_val), '/', len(idx_test))
print('Undirected:', data.is_undirected())
calc_homophily(data)
print('Average Cluster Coefficient:', clustering_coef(data))
print('Sparsity:', calc_sparsity(data))

## Citeseer:

In [None]:
# Download and process data at './dataset/Planetiod/Citeseer/'
dataset = Planetoid(root='dataset/Planetoid', name='Citeseer')

data = dataset[0]
idx_train = mask_to_index(data.train_mask)
idx_val = mask_to_index(data.val_mask)
idx_test = mask_to_index(data.test_mask)

print(data)
print("Split: (train / val / test)")
print("\t", len(idx_train), '/', len(idx_val), '/', len(idx_test))
print('Undirected:', data.is_undirected())
calc_homophily(data)
print('Average Cluster Coefficient:', clustering_coef(data))
print('Sparsity:', calc_sparsity(data))

## Pubmed:

In [None]:
# Download and process data at './dataset/Planetiod/Pubmed/'
dataset = Planetoid(root='dataset/Planetoid', name='PubMed')

data = dataset[0]
idx_train = mask_to_index(data.train_mask)
idx_val = mask_to_index(data.val_mask)
idx_test = mask_to_index(data.test_mask)

print(data)
print("Split: (train / val / test)")
print("\t", len(idx_train), '/', len(idx_val), '/', len(idx_test))
print('Undirected:', data.is_undirected())
calc_homophily(data)
print('Average Cluster Coefficient:', clustering_coef(data))
print('Sparsity:', calc_sparsity(data))

## Reddit2:

In [None]:
# Download and process data at './dataset/Reddit2/'
dataset = Reddit2(root='dataset/Reddit2')

data = dataset[0]
idx_train = mask_to_index(data.train_mask)
idx_val = mask_to_index(data.val_mask)
idx_test = mask_to_index(data.test_mask)

print(data)
print("Split: (train / val / test)")
print("\t", len(idx_train), '/', len(idx_val), '/', len(idx_test))
print('Undirected:', data.is_undirected())
calc_homophily(data)
print('Average Cluster Coefficient:', clustering_coef(data))
print('Sparsity:', calc_sparsity(data))

## Facebook:

In [None]:
# Download and process data at './dataset/FacebookPagePage/'
dataset = FacebookPagePage(root='dataset/FacebookPagePage')

data = dataset[0]
transform = RandomNodeSplit(split='test_rest')
transform(data)
idx_train = mask_to_index(data.train_mask)
idx_val = mask_to_index(data.val_mask)
idx_test = mask_to_index(data.test_mask)

print(data)
print("Split: (train / val / test)")
print("\t", len(idx_train), '/', len(idx_val), '/', len(idx_test))
print('Undirected:', data.is_undirected())
calc_homophily(data)
print('Average Cluster Coefficient:', clustering_coef(data))
print('Sparsity:', calc_sparsity(data))

# Cora

In [None]:
dataset = Planetoid(root='dataset/Planetoid', name='cora')

data = dataset[0]
idx_train = mask_to_index(data.train_mask)
idx_val = mask_to_index(data.val_mask)
idx_test = mask_to_index(data.test_mask)

print(data)
print("Split: (train / val / test)")
print("\t", len(idx_train), '/', len(idx_val), '/', len(idx_test))
print('Undirected:', data.is_undirected())
calc_homophily(data)
print('Average Cluster Coefficient:', clustering_coef(data))
print('Sparsity:', calc_sparsity(data))