# Imports

In [None]:
from ogb.nodeproppred import PygNodePropPredDataset
from torch_geometric.utils import homophily
from torch_geometric.datasets import Planetoid, Reddit2, FacebookPagePage
import networkx as nx
from torch_geometric.utils import to_networkx

## Calculate Cluster coefficient

In [None]:
def clustering_coef(data):
    # Convert data to a NetworkX graph
    G = to_networkx(data)

    # Compute the average cluster coefficient https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.cluster.average_clustering.html
    average_cc = nx.average_clustering(G)
    return average_cc

## Calculate hompholiy measures

In [None]:
def calc_homophily(data):
    print('Edge Homophily:', homophily(data.edge_index, data.y, method='edge'))
    print('Node Homophily:', homophily(data.edge_index, data.y, method='node'))
    print('Edge Insensitive Homophily:', homophily(data.edge_index, data.y, method='edge_insensitive'))

## Calculate sparsity

In [None]:
def calc_sparsity(cur_data):
    # Convert data to a NetworkX graph
    G = to_networkx(cur_data, to_undirected=True)
    # Compute the density https://networkx.org/documentation/stable/reference/generated/networkx.classes.function.density.html
    density = nx.density(G)
    sparsity = 1 - density
    return sparsity

# General method for Dataset Calculations

In [None]:
def calculate_stats(data):
    print(data)
    print('Undirected:', data.is_undirected())
    calc_homophily(data)
    print('Average Cluster Coefficient:', clustering_coef(data))

# Dataset calculations:
## ogbn-arxiv:

In [None]:
print("ogbn-arxiv")
calculate_stats(PygNodePropPredDataset(name="ogbn-arxiv")[0])

## Cora:

In [None]:
print("Cora")
calculate_stats(Planetoid(root='dataset/Planetoid', name='cora')[0])

## CiteSeer:

In [None]:
print("CiteSeer")
calculate_stats(Planetoid(root='dataset/Planetoid', name='CiteSeer')[0])

## Pubmed:

In [None]:
print("Pubmed")
calculate_stats(Planetoid(root='dataset/Planetoid', name='Pubmed')[0])

## Reddit2:

In [None]:
print("Reddit2")
calculate_stats(Reddit2(root='dataset/Reddit2')[0])

## FacebookPagePage

In [None]:
print("Facebook")
calculate_stats(FacebookPagePage(root='dataset/FacebookPagePage')[0])