# Imports

In [1]:
from ogb.nodeproppred import PygNodePropPredDataset
from torch_geometric.utils import homophily
from torch_geometric.datasets import Planetoid, Reddit, FacebookPagePage
import networkx as nx
from torch_geometric.utils import to_networkx

# Calculate Cluster coefficient

In [2]:
def clustering_coef(data):
    # Convert data to a NetworkX graph
    G = to_networkx(data)

    # Compute the average cluster coefficient https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.cluster.average_clustering.html
    average_cc = nx.average_clustering(G)
    return average_cc

# Dataset calculations:
## ogbn-arxiv:

In [3]:
# Download and process data at './dataset/ogbn_arxiv/'
dataset = PygNodePropPredDataset(name="ogbn-arxiv")

data = dataset[0]

print(data)
print('Undirected:', data.is_undirected())
print('Edge Homophily:', homophily(data.edge_index, data.y, method='edge'))
print('Node Homophily:', homophily(data.edge_index, data.y, method='node'))
print('Edge Insensitive Homophily:', homophily(data.edge_index, data.y, method='edge_insensitive'))
print('Average Cluster Coefficient:', clustering_coef(data))

Data(num_nodes=169343, edge_index=[2, 1166243], x=[169343, 128], node_year=[169343, 1], y=[169343, 1])
Undirected: False
Edge Homophily: 0.6550830602645874
Node Homophily: 0.4279983341693878
Edge Insensitive Homophily: 0.44445815682411194
Average Cluster Coefficient: 0.11829836531589452


## ogbn-products:

In [4]:
# Download and process data at './dataset/ogbn_producs/'
dataset = PygNodePropPredDataset(name="ogbn-products")

# Split:
# split_idx = dataset.get_idx_split()
# train_idx, valid_idx, test_idx = split_idx["train"], split_idx["valid"], split_idx["test"]

data = dataset[0]

print(data)
print('Undirected:', data.is_undirected())
print('Edge Homophily:', homophily(data.edge_index, data.y, method='edge'))
print('Node Homophily:', homophily(data.edge_index, data.y, method='node'))
print('Edge Insensitive Homophily:', homophily(data.edge_index, data.y, method='edge_insensitive'))
print('Average Cluster Coefficient:', clustering_coef(data))

Data(num_nodes=2449029, edge_index=[2, 123718280], x=[2449029, 100], y=[2449029, 1])
Undirected: True
Edge Homophily: 0.807552695274353
Node Homophily: 0.8165683746337891
Edge Insensitive Homophily: 0.46079307794570923


MemoryError: 

## Citeseer:

In [None]:
# Download and process data at './dataset/Planetiod/Citeseer/'
dataset = Planetoid(root='dataset/Planetoid', name='Citeseer')

data = dataset[0]
#
# split = dataset.get(0)
# print(split.edge_attr)
#
# sparse_mx = to_scipy_sparse_matrix(split.edge_index).tocoo().astype(np.float32)
#
# print(sparse_mx) # equivalent to utils line 86
#
# features = split.x
# labels = split.y
# idx_train = mask_to_index(split.train_mask)
# idx_val = mask_to_index(split.val_mask)
# idx_test = mask_to_index(split.test_mask)
#
# sparse_mx, features = preprocess_citation(sparse_mx, features, "AugNormAdj")
#
# indices = torch.from_numpy(
#         np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
# values = torch.from_numpy(sparse_mx.data)
# shape = torch.Size(sparse_mx.shape)
#
# print(indices, values, shape)
#
# print('feat', features.shape)
#print('label', labels)
#print('idx_train', idx_train)
#print('idx_val', idx_val)
#print('idx_test', idx_test)


print(data)
print('Undirected:', data.is_undirected())
print('Edge Homophily:', homophily(data.edge_index, data.y, method='edge'))
print('Node Homophily:', homophily(data.edge_index, data.y, method='node'))
print('Edge Insensitive Homophily:', homophily(data.edge_index, data.y, method='edge_insensitive'))
print('Average Cluster Coefficient:', clustering_coef(data))

## Pubmed:

In [None]:
# Download and process data at './dataset/Planetiod/Pubmed/'
dataset = Planetoid(root='dataset/Planetoid', name='PubMed')

data = dataset[0]

print(data)
print('Undirected:', data.is_undirected())
print('Edge Homophily:', homophily(data.edge_index, data.y, method='edge'))
print('Node Homophily:', homophily(data.edge_index, data.y, method='node'))
print('Edge Insensitive Homophily:', homophily(data.edge_index, data.y, method='edge_insensitive'))
print('Average Cluster Coefficient:', clustering_coef(data))

## Reddit:

In [None]:
# Download and process data at './dataset/Reddit/'
dataset = Reddit(root='dataset/Reddit')

data = dataset[0]

split = dataset.get(0)
#adj = to_scipy_sparse_matrix(split.edge_index).tocoo().astype(np.float32)
print(split)

print(data)
print('Undirected:', data.is_undirected())
print('Edge Homophily:', homophily(data.edge_index, data.y, method='edge'))
print('Node Homophily:', homophily(data.edge_index, data.y, method='node'))
print('Edge Insensitive Homophily:', homophily(data.edge_index, data.y, method='edge_insensitive'))
print('Average Cluster Coefficient:', clustering_coef(data))

## Facebook:

In [None]:
# Download and process data at './dataset/FacebookPagePage/'
dataset = FacebookPagePage(root='dataset/FacebookPagePage')

data = dataset[0]

print('Undirected:', data.is_undirected())
print('Edge Homophily:', homophily(data.edge_index, data.y, method='edge'))
print('Node Homophily:', homophily(data.edge_index, data.y, method='node'))
print('Edge Insensitive Homophily:', homophily(data.edge_index, data.y, method='edge_insensitive'))
print('Average Cluster Coefficient:', clustering_coef(data))

# Cora

In [None]:
dataset = Planetoid(root='dataset/cora', name='cora')

print('Undirected:', data.is_undirected())
print('Edge Homophily:', homophily(data.edge_index, data.y, method='edge'))
print('Node Homophily:', homophily(data.edge_index, data.y, method='node'))
print('Edge Insensitive Homophily:', homophily(data.edge_index, data.y, method='edge_insensitive'))
print('Average Cluster Coefficient:', clustering_coef(data))

<hr />

# Using the homophily method from torch, trial:

In [None]:
dataset = PygNodePropPredDataset(name="ogbn-arxiv")

data = dataset[0]
print(data)
print(data.edge_index)
print(data.y)
print(homophily(data.edge_index, data.y, method='node'))