In [None]:
# import packages

from attrdict import AttrDict
from torch_geometric.datasets import TUDataset
from torch_geometric.data import Data
from torch_geometric.utils import to_networkx, from_networkx, to_dense_adj
import torch_geometric.transforms as T

import networkx as nx
import time
import tqdm
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import wget
import zipfile
import os

In [None]:
# load datasets

mutag = list(TUDataset(root="data", name="MUTAG"))
enzymes = list(TUDataset(root="data", name="ENZYMES"))
proteins = list(TUDataset(root="data", name="PROTEINS"))
imdb = list(TUDataset(root="data", name="IMDB-BINARY"))

In [None]:
# implement functions to compute graph attributes

class graph_attributes:
    def __init__(self):
        self.edge_density = []
        self.average_degree = []
        self.degree_assortativity = []
        self.pseudo_diameter = []
        self.relative_size_of_largest_component = []
        self.average_clustering_coefficient = []
        self.transitivity = []
        self.degeneracy = []
        self.gini_coeff_degree = []
        self.edge_homogeneity = []
        self.in_feature_similarity = []
        self.out_feature_similarity = []
        self.feature_angular_snr =[]
        self.homophily_measure = []
        self.attribute_assortativity = []

    def compute_edge_density(self, data):
        for i in tqdm.tqdm(range(len(data))):
            edge_density = data[i].num_edges / (data[i].num_nodes * (data[i].num_nodes - 1) / 2)
            self.edge_density.append(edge_density)

    def compute_average_degree(self, data):
        for i in tqdm.tqdm(range(len(data))):
            average_degree = data[i].num_edges / data[i].num_nodes
            self.average_degree.append(average_degree)

    def compute_degree_assortativity(self, data):
        for i in tqdm.tqdm(range(len(data))):
            G = to_networkx(data[i])
            degree_assortativity = nx.degree_assortativity_coefficient(G)
            self.degree_assortativity.append(degree_assortativity)

    def compute_pseudo_diameter(self, data):
        for i in tqdm.tqdm(range(len(data))):
            G = to_networkx(data[i])
            # if G is not connected, compute the pseudo diameter of the largest connected component
            if nx.is_connected(G) == False:
                G = max(nx.connected_component_subgraphs(G), key=len)
            pseudo_diameter = nx.algorithms.distance_measures.diameter(G)
            self.pseudo_diameter.append(pseudo_diameter)

    def compute_relative_size_of_largest_component(self, data):
        for i in tqdm.tqdm(range(len(data))):
            G = to_networkx(data[i])
            relative_size_of_largest_component = len(max(nx.connected_component_subgraphs(G), key=len)) / data[i].num_nodes
            self.relative_size_of_largest_component.append(relative_size_of_largest_component)

    def compute_average_clustering_coefficient(self, data):
        # for each graph, compute the average clustering coefficient of all nodes
        for i in tqdm.tqdm(range(len(data))):
            G = to_networkx(data[i])
            average_clustering_coefficient = nx.average_clustering(G)
            self.average_clustering_coefficient.append(average_clustering_coefficient)

    def compute_transitivity(self, data):
        # for each graph, compute the transitivity as the ratio of all triangles over all possible triangles
        for i in tqdm.tqdm(range(len(data))):
            # compute the number of triangles
            G = to_networkx(data[i])
            triangles = sum(nx.triangles(G).values()) / 3
            # compute the number of all open triangles,
            # i.e. the number of pairs of edges with a shared vertex
            open_triangles = 0
            for node in G.nodes:
                open_triangles += len(list(nx.open_triangles(G, node)))

            # compute the ratio of triangles over all possible triangles
            transitivity = triangles / open_triangles
            self.transitivity.append(transitivity)

    def compute_degeneracy(self, data):
        # for each graph, compute the degeneracy as the minimum number of colors needed to color the graph
        for i in tqdm.tqdm(range(len(data))):
            G = to_networkx(data[i])
            degeneracy = nx.algorithms.core.core_number(G)
            self.degeneracy.append(degeneracy)

    def compute_gini_coeff_degree(self, data):
        # for each graph, compute the gini coefficient of the degree distribution
        for i in tqdm.tqdm(range(len(data))):
            G = to_networkx(data[i])
            degree_sequence = sorted([d for n, d in G.degree()], reverse=True)
            n = len(degree_sequence)
            gini_coeff_degree = sum([(2 * (i + 1) - n - 1) * degree_sequence[i] for i in range(n)]) / (n * sum(degree_sequence))
            self.gini_coeff_degree.append(gini_coeff_degree)

    def compute_edge_homogeneity(self, data):
        pass

    def compute_in_feature_similarity(self, data):
        pass

    def compute_out_feature_similarity(self, data):
        pass

    def compute_feature_angular_snr(self, data):
        pass

    def compute_homophily_measure(self, data):
        pass

    def compute_attribute_assortativity(self, data):
        # for each graph, compute the assortativity of the node attributes
        for i in tqdm.tqdm(range(len(data))):
            G = to_networkx(data[i])
            attribute_assortativity = nx.attribute_assortativity_coefficient(G, "node_attr")
            self.attribute_assortativity.append(attribute_assortativity)

    def compute_all_attributes(self, data):
        self.compute_edge_density(data)
        self.compute_average_degree(data)
        self.compute_degree_assortativity(data)
        self.compute_pseudo_diameter(data)
        self.compute_relative_size_of_largest_component(data)
        self.compute_average_clustering_coefficient(data)
        self.compute_transitivity(data)
        self.compute_degeneracy(data)
        self.compute_gini_coeff_degree(data)
        self.compute_edge_homogeneity(data)
        self.compute_in_feature_similarity(data)
        self.compute_out_feature_similarity(data)
        self.compute_feature_angular_snr(data)
        self.compute_homophily_measure(data)
        self.compute_attribute_assortativity(data)

    # visualize the distribution of each attribute
    def visualize_attributes(self):
        plt.figure(figsize=(20, 20))
        plt.subplot(4, 4, 1)
        plt.hist(self.edge_density)
        plt.title("edge density")
        plt.subplot(4, 4, 2)
        plt.hist(self.average_degree)
        plt.title("average degree")
        plt.subplot(4, 4, 3)
        plt.hist(self.degree_assortativity)
        plt.title("degree assortativity")
        plt.subplot(4, 4, 4)
        plt.hist(self.pseudo_diameter)
        plt.title("pseudo diameter")
        plt.subplot(4, 4, 5)
        plt.hist(self.relative_size_of_largest_component)
        plt.title("relative size of largest component")
        plt.subplot(4, 4, 6)
        plt.hist(self.average_clustering_coefficient)
        plt.title("average clustering coefficient")
        plt.subplot(4, 4, 7)
        plt.hist(self.transitivity)
        plt.title("transitivity")
        plt.subplot(4, 4, 8)
        plt.hist(self.degeneracy)
        plt.title("degeneracy")
        plt.subplot(4, 4, 9)
        plt.hist(self.gini_coeff_degree)
        plt.title("gini coefficient of degree distribution")
        plt.subplot(4, 4, 10)
        plt.hist(self.edge_homogeneity)
        plt.title("edge homogeneity")
        plt.subplot(4, 4, 11)
        plt.hist(self.in_feature_similarity)
        plt.title("in feature similarity")
        plt.subplot(4, 4, 12)
        plt.hist(self.out_feature_similarity)
        plt.title("out feature similarity")
        plt.subplot(4, 4, 13)
        plt.hist(self.feature_angular_snr)
        plt.title("feature angular snr")
        plt.subplot(4, 4, 14)
        plt.hist(self.homophily_measure)
        plt.title("homophily measure")
        plt.subplot(4, 4, 15)
        plt.hist(self.attribute_assortativity)
        plt.title("attribute assortativity")
        plt.show()