# Project: Node Prediction for OGB-Arxiv using Curvature Graph Neural Networks

**CS224W: Machine Learning with Graphs**


_Stanford University. Winter, 2021._

---

**Team Members:** Gongqi Li, Khushal Sethi, Prathyusha Burugupalli

---
This colab implements generate Forman Curvature information for Ogb-Arxiv dataset.

## Environment Setup

In [None]:
!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install -q torch-geometric
!pip install ujson
!pip install ogb

[K     |████████████████████████████████| 2.6MB 14.1MB/s 
[K     |████████████████████████████████| 1.5MB 14.3MB/s 
[K     |████████████████████████████████| 194kB 14.5MB/s 
[K     |████████████████████████████████| 235kB 27.3MB/s 
[K     |████████████████████████████████| 2.2MB 29.8MB/s 
[K     |████████████████████████████████| 51kB 5.1MB/s 
[?25h  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone
Collecting ujson
[?25l  Downloading https://files.pythonhosted.org/packages/17/4e/50e8e4cf5f00b537095711c2c86ac4d7191aed2b4fffd5a19f06898f6929/ujson-4.0.2-cp37-cp37m-manylinux1_x86_64.whl (179kB)
[K     |████████████████████████████████| 184kB 15.8MB/s 
[?25hInstalling collected packages: ujson
Successfully installed ujson-4.0.2
Collecting ogb
[?25l  Downloading https://files.pythonhosted.org/packages/34/47/16573587124ee85c8255cebd30c55981fa78c815eaff966ff111fb11c32c/ogb-1.3.0-py3-none-any.whl (67kB)
[K     |████████████████████████████████| 71kB 6.2MB/s 
Collec

In [None]:
import torch_geometric
import torch_geometric.transforms as T
from torch_geometric.utils.convert import to_networkx
from torch_geometric.utils import to_undirected
import networkx as nx
import math
import ujson
from ogb.nodeproppred import PygNodePropPredDataset

## Forman Ricci Curvature

In [None]:
class FormanRicci:
    def __init__(self, G: nx.Graph, weight="weight", method="1d"):
        """A class to compute Forman-Ricci curvature for all nodes and edges in G.
        Parameters
        ----------
        G : NetworkX graph
            A given NetworkX graph, unweighted graph only for now, edge weight will be ignored.
        weight : str
            The edge weight used to compute Ricci curvature. (Default value = "weight")
        method : {"1d", "augmented"}
            The method used to compute Forman-Ricci curvature. (Default value = "augmented")
            - "1d": Computed with 1-dimensional simplicial complex (vertex, edge).
            - "augmented": Computed with 2-dimensional simplicial complex, length <=3 (vertex, edge, face).
        """

        self.G = G.copy()
        self.weight = weight
        self.method = method

        if not nx.get_edge_attributes(self.G, self.weight):
            print('Edge weight not detected in graph, use "weight" as default edge weight.')
            for (v1, v2) in self.G.edges():
                self.G[v1][v2][self.weight] = 1.0
        if not nx.get_node_attributes(self.G, self.weight):
            print('Node weight not detected in graph, use "weight" as default node weight.')
            for v in self.G.nodes():
                self.G.nodes[v][self.weight] = 1.0

    def compute_ricci_curvature(self):
        """Compute Forman-ricci curvature for all nodes and edges in G.
        Node curvature is defined as the average of all it's adjacency edge.
        Returns
        -------
        G: NetworkX graph
            A NetworkX graph with "formanCurvature" on nodes and edges.
        Examples
        --------
        To compute the Forman-Ricci curvature for karate club graph:
            >>> G = nx.karate_club_graph()
            >>> frc = FormanRicci(G)
            >>> frc.compute_ricci_curvature()
            >>> frc.G[0][2]
            {'weight': 1.0, 'formanCurvature': -7.0}
        """

        if self.method == "1d":
            # Edge Forman curvature
            num_edges = self.G.number_of_edges()
            i = 0
            for (v1, v2) in self.G.edges():
                if i%1==0:
                  v1_nbr = set(self.G.neighbors(v1))
                  v1_nbr.remove(v2)
                  v2_nbr = set(self.G.neighbors(v2))
                  v2_nbr.remove(v1)
                  # try:
                  #     v1_nbr = set(self.G.neighbors(v1))
                  #     v1_nbr.remove(v2)
                  # except:
                  #     v1_nbr = set(self.G.neighbors(v1))
                  # try:
                  #     v2_nbr = set(self.G.neighbors(v2))
                  #     v2_nbr.remove(v1)
                  # except:
                  #     v2_nbr = set(self.G.neighbors(v2))

                  w_e = self.G[v1][v2][self.weight]
                  w_v1 = self.G.nodes[v1][self.weight]
                  w_v2 = self.G.nodes[v2][self.weight]
                  ev1_sum = sum([w_v1 / math.sqrt(w_e * self.G[v1][v][self.weight]) for v in v1_nbr])
                  ev2_sum = sum([w_v2 / math.sqrt(w_e * self.G[v2][v][self.weight]) for v in v2_nbr])

                  curvature = w_e * (w_v1 / w_e + w_v2 / w_e - (ev1_sum + ev2_sum))
                  self.G[v1][v2]["formanCurvature"] = curvature
                else:
                  self.G[v1][v2]["formanCurvature"] = curvature

                if i%5000==0:
                    print('Edges: {} / {}'.format(i, num_edges))
                
                i += 1

        elif self.method == "augmented":
            # Edge Forman curvature
            num_edges = self.G.number_of_edges()
            i = 0
            for (v1, v2) in self.G.edges():
                v1_nbr = set(self.G.neighbors(v1))
                v1_nbr.remove(v2)
                v2_nbr = set(self.G.neighbors(v2))
                v2_nbr.remove(v1)

                face = v1_nbr & v2_nbr
                # prl_nbr = (v1_nbr | v2_nbr) - face

                w_e = self.G[v1][v2][self.weight]
                w_f = 1  # Assume all face have weight 1
                w_v1 = self.G.nodes[v1][self.weight]
                w_v2 = self.G.nodes[v2][self.weight]

                sum_ef = sum([w_e / w_f for _ in face])
                sum_ve = sum([w_v1 / w_e + w_v2 / w_e])

                # sum_ehef = sum([math.sqrt(w_e*self.G[v1][v][self.weight])/w_f +
                #                 math.sqrt(w_e*self.G[v2][v][self.weight])/w_f
                #                 for v in face])
                sum_ehef = 0  # Always 0 for cycle = 3 case.
                sum_veeh = sum([w_v1 / math.sqrt(w_e * self.G[v1][v][self.weight]) for v in (v1_nbr - face)] +
                               [w_v2 / math.sqrt(w_e * self.G[v2][v][self.weight]) for v in (v2_nbr - face)])

                self.G[v1][v2]["formanCurvature"] = w_e * (sum_ef + sum_ve - math.fabs(sum_ehef - sum_veeh))
                if i%5000==0:
                    print('Edges: {} / {}'.format(i, num_edges))
                i += 1
        else:
            assert True, 'Method %s not available. Support methods: {"1d","augmented"}' % self.method

        # Node Forman curvature
        for n in self.G.nodes():
            fcsum = 0  # sum of the neighbor Forman curvature
            if self.G.degree(n) != 0:
                for nbr in self.G.neighbors(n):
                    if 'formanCurvature' in self.G[n][nbr]:
                        fcsum += self.G[n][nbr]['formanCurvature']

                # assign the node Forman curvature to be the average of node's adjacency edges
                self.G.nodes[n]['formanCurvature'] = fcsum / self.G.degree(n)
            else:
                self.G.nodes[n]['formanCurvature'] = fcsum

        print("Forman curvature (%s) computation done." % self.method)

## Compute Curvature for Ogb-Arxiv

In [None]:
dataset = PygNodePropPredDataset(name='ogbn-arxiv')
data = dataset[0]
data.edge_index = to_undirected(data.edge_index, data.num_nodes)
G = to_networkx(data)
frc = FormanRicci(G, method="augmented")
frc.compute_ricci_curvature()
G_frc = frc.G.copy()

Edge weight not detected in graph, use "weight" as default edge weight.
Node weight not detected in graph, use "weight" as default node weight.
Edges: 0 / 2315598
Edges: 5000 / 2315598
Edges: 10000 / 2315598
Edges: 15000 / 2315598
Edges: 20000 / 2315598
Edges: 25000 / 2315598
Edges: 30000 / 2315598
Edges: 35000 / 2315598
Edges: 40000 / 2315598
Edges: 45000 / 2315598
Edges: 50000 / 2315598
Edges: 55000 / 2315598
Edges: 60000 / 2315598
Edges: 65000 / 2315598
Edges: 70000 / 2315598
Edges: 75000 / 2315598
Edges: 80000 / 2315598
Edges: 85000 / 2315598
Edges: 90000 / 2315598
Edges: 95000 / 2315598
Edges: 100000 / 2315598
Edges: 105000 / 2315598
Edges: 110000 / 2315598
Edges: 115000 / 2315598
Edges: 120000 / 2315598
Edges: 125000 / 2315598
Edges: 130000 / 2315598
Edges: 135000 / 2315598
Edges: 140000 / 2315598
Edges: 145000 / 2315598
Edges: 150000 / 2315598
Edges: 155000 / 2315598
Edges: 160000 / 2315598
Edges: 165000 / 2315598
Edges: 170000 / 2315598
Edges: 175000 / 2315598
Edges: 180000 / 2

## Save the curvature info

In [None]:
forman_curvatures = nx.get_edge_attributes(G_frc, "formanCurvature")
with open('curvature.txt', 'w') as file:
    file.write(ujson.dumps(forman_curvatures))