In [None]:
# imports 
import torch
def format_pytorch_version(version):
    return version.split('+')[0]
def format_cuda_version(version):
    return 'cu' + version.replace('.', '')

TORCH_version = torch.__version__
TORCH = format_pytorch_version(TORCH_version)
CUDA_version = torch.version.cuda
CUDA = format_cuda_version(CUDA_version)
!pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-sparse -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-cluster -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-geometric
!pip install python-igraph leidenalg cairocffi
!pip install dgl

from torch_geometric.datasets import Planetoid
from torch_geometric.loader import DataLoader
import torch_geometric.utils as U
import torch.nn.functional as F
from torch.nn import Linear, Dropout
from torch_geometric.nn import GCNConv

from torch import nn
from torch.optim import Adam
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
from torch.utils.tensorboard import SummaryWriter
import time
import igraph as ig

import dgl.nn as dglnn
from dgl import AddSelfLoop
from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset
import pdb
from dgl.nn.pytorch import GATConv

In [9]:
#constants
DROPOUT_RATE=0.0
N_EPOCHS=200
REPEATS=100
HIDDEN_UNITS=8
HEADS=[8,1]

# hyperparameters
class lambda_value():
  def __init__(self,lambda_p):
    super().__init__()
    self.lambda_p=lambda_p

  def update_lambda(self,l):
    self.lambda_p=l
  
  def get_lambda(self):
    return self.lambda_p

latest_lambda = lambda_value(1.01)


In [10]:
# 1 - BASE GAT MODEL 
class GATV2(nn.Module):
    def __init__(self, in_size, hid_size, out_size, heads, num_layers=2):
        super().__init__()
        self.gat_layers = nn.ModuleList()
        
        self.gat_layers.append(
            dglnn.GATv2Conv(
                in_size,
                hid_size,
                heads[0],
                feat_drop=DROPOUT_RATE,
                attn_drop=DROPOUT_RATE,
                activation=F.elu,
            )
        )

        for n in range(num_layers-2):
           self.gat_layers.append(
            dglnn.GATv2Conv(
                hid_size * heads[0],
                hid_size,
                heads[0],
                feat_drop=DROPOUT_RATE,
                attn_drop=DROPOUT_RATE,
                activation=F.elu,
            )
        )

        self.gat_layers.append(
            dglnn.GATv2Conv(
                hid_size * heads[0],
                out_size,
                heads[1],
                feat_drop=DROPOUT_RATE,
                attn_drop=DROPOUT_RATE,
                activation=None,
            )
        )


    def forward(self, g, inputs):
        h = inputs
        for i, layer in enumerate(self.gat_layers):
            h = layer(g, h)
            if i == len(self.gat_layers)-1:  
                h = h.mean(1)
            else: 
                h = h.flatten(1)
        return h

In [11]:
# 2 - FORWARD FEATURE SCALING
class GATReg(nn.Module):
    def __init__(self, in_size, hid_size, out_size, heads, lambda_parameter=1.01, num_layers=2):
        super().__init__()
        self.gat_layers = nn.ModuleList()
        self.lambda_parameter=lambda_parameter

  
        self.gat_layers.append(
            dglnn.GATv2Conv(
                in_size,
                hid_size,
                heads[0],
                feat_drop=DROPOUT_RATE,
                attn_drop=DROPOUT_RATE,
                activation=F.elu,
            )
        )
 
        for n in range(num_layers-2):
           self.gat_layers.append(
            dglnn.GATv2Conv(
                hid_size * heads[0],
                hid_size,
                heads[0],
                feat_drop=DROPOUT_RATE,
                attn_drop=DROPOUT_RATE,
                activation=F.elu,
            )
        )
           
        self.gat_layers.append(
            dglnn.GATv2Conv(
                hid_size * heads[0],
                out_size,
                heads[1],
                feat_drop=DROPOUT_RATE,
                attn_drop=DROPOUT_RATE,
                activation=None,
            )
        )

    def forward(self, g, inputs):
        h = inputs
        beta = torch.distributions.beta.Beta(torch.tensor([self.lambda_parameter], device="cpu"), torch.tensor([self.lambda_parameter], device="cpu")) 
        scalar = beta.sample(h.shape[:1]) + 0.1
        scalar[scalar > 1] = 1.0/(2.0-scalar[scalar > 1])

        for i, layer in enumerate(self.gat_layers):
            h = layer(g, h)
            if i == len(self.gat_layers)-1:  
                h = h.mean(1)

            else:  
                h = h.flatten(1)
        h = scalar * h
        
        return h 

In [12]:
# 3,4 - BACKWARD GRADIENTS SCALING + COMBINED SCALING
class backward_reg(torch.autograd.Function):
    @staticmethod
    def forward(xx, x, training):
        if training:
            lambda_p = latest_lambda.get_lambda()
            beta = torch.distributions.beta.Beta(torch.tensor([lambda_p], device="cpu"), torch.tensor([lambda_p], device="cpu"))
            lambdaa = beta.sample(x.shape[:1]) + 0.1
            lambdaa[lambdaa > 1] = 1.0/(2.0-lambdaa[lambdaa> 1])
            x = lambdaa * x
        
        return x

    @staticmethod
    def backward(xx, gradients):

        lambda_p = latest_lambda.get_lambda()
        beta = torch.distributions.beta.Beta(torch.tensor([lambda_p], device="cpu"), torch.tensor([lambda_p], device="cpu"))
        lambdaa = beta.sample(gradients.shape[:1]) + 0.1
        lambdaa[lambdaa > 1] = 1.0/(2.0-lambdaa[lambdaa > 1])

        return lambdaa * gradients, 0

class GATReg2(nn.Module):
    def __init__(self, in_size, hid_size, out_size, heads, lambda_parameter=0.05, num_layers=2):
        super().__init__()
        self.gat_layers = nn.ModuleList()
        self.lambda_parameter=lambda_parameter
       
        self.gat_layers.append(
            dglnn.GATv2Conv(
                in_size,
                hid_size,
                heads[0],
                feat_drop=DROPOUT_RATE,
                attn_drop=DROPOUT_RATE,
                activation=F.elu,
            )
        )

        for n in range(num_layers-2):
           self.gat_layers.append(
            dglnn.GATv2Conv(
                hid_size * heads[0],
                hid_size,
                heads[0],
                feat_drop=DROPOUT_RATE,
                attn_drop=DROPOUT_RATE,
                activation=F.elu,
            )
        )
           
        self.gat_layers.append(
            dglnn.GATv2Conv(
                hid_size * heads[0],
                out_size,
                heads[1],
                feat_drop=DROPOUT_RATE,
                attn_drop=DROPOUT_RATE,
                activation=None,
            )
        )

    def forward(self, g, inputs):
        h = inputs
        beta = torch.distributions.beta.Beta(torch.tensor([self.lambda_parameter], device="cpu"), torch.tensor([self.lambda_parameter], device="cpu")) 
        scalar = beta.sample(h.shape[:1]) + 0.1
        scalar[scalar > 1.0] = 1.0/(2.0-scalar[scalar > 1])
        for i, layer in enumerate(self.gat_layers):
            h = layer(g, h)
            if i == len(self.gat_layers)-1: 
                h = h.mean(1)
            else: 
                h = h.flatten(1)
        #h=h*scalar
        return h