<a href="https://colab.research.google.com/github/96jonesa/CSE-517-Project/blob/main/scaffolding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Imports

In [None]:
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

#GRU

In [None]:
class GRU(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(GRU, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size

        self.gru = nn.GRU(input_size, hidden_size)

    def forward(self, input, h_0):
        output, hn = self.gru(input, h_0)
        return output, hn

#Self-Attention

In [None]:
# attention weights are softmax(left^T W right) where W is learned parameter matrix

class BilinearAttention(nn.Module):
    def __init__(self, left_size, right_size, weights_size):
        super(BilinearAttention, self).__init__()
        self.left_size = left_size
        self.right_size = right_size
        self.weights_size = weights_size

        self.bilinear = nn.Bilinear(self.left_size, self.right_size, weights_size, bias=False)
        self.softmax = nn.Softmax()

    def forward(self, left, right):
        attention_weights = self.softmax(self.bilinear(left, right))
        output_features = torch.mm(attention_weights, left)  # check this

        return attention_weights, output_features

In [None]:
# attention weights are softmax(u^T tanh(W input + b)) where W is learned parameter matrix, u is a learned parameter vector, and b is a learned offset

class LinearAttention(nn.Module):
    def __init__(self, input_size, intermediate_size, weights_size):
        super(LinearAttention, self).__init__()
        self.input_size = input_size
        self.intermediate_size = intermediate_size
        self.weights_size = weights_size

        self.linear_1 = nn.Linear(self.input_size, self.intermediate_size, bias=True)
        self.linear_2 = nn.Linear(self.intermediate_size, self.weights_size, bias=False)
        self.tanh = nn.Tanh()
        self.softmax = nn.Softmax()

    def forward(self, input):
        intermediate = self.tanh(self.linear_1(input))
        attention_weights = self.softmax(self.linear_2(intermediate))
        output_features = torch.mm(attention_weights, input)  # check this

        return attention_weights, output_features

#Blending

In [None]:
# output is ReLU(left^T W right + b) where W is a learned paramater matrix
# and b is a learned bias

class Blend(nn.Module):
    def __init__(self, left_size, right_size, output_size):
        super(Blend, self).__init__()
        self.left_size = left_size
        self.right_size = right_size
        self.output_size = output_size

        self.bilinear = nn.Bilinear(self.left_size, self.right_size, output_size, bias=True)
        self.relu = nn.ReLU()
    
    def forward(self, left, right):
        output = self.relu(self.bilinear(left, right))

        return output

#Single-Headed Graph Attention Network (SGAT)

In [None]:
# need shared learned parameter matrix W to multiply against each input vector

class SharedLinear(nn.Module):
    def __init__(self, input_size, output_size):
        super(SharedLinear, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        
        self.linear = nn.Linear(input_size, output_size, bias=False)
    
    def forward(self, input):
        output = self.linear(input)

        return output

In [None]:
# merge code with MGAT code to form general case GAT code

class SGAT(nn.Module):
    def __init__(self, input_size, weights_size, leakyrelu_slope):
        super(SGAT, self).__init__()
        self.input_size = input_size
        self.weights_size = weights_size
        self.leakyrelu_slope = leakyrelu_slope
        
        self.linear = nn.Linear(2 * input_size, weights_size, bias=False)
        self.leakyrelu = nn.LeakyReLU(self.leakyrelu_slope)
        self.softmax = nn.Softmax()

    def forward(self, input, neighborhood):
        attention_weights = self.softmax(self.leakyrelu(self.linear(torch.cat(input, neighborhood))))  # check concatenation axis
        output_features = torch.mm(attention_weights, input)  # check this

        return attention_weights

#Multi-Headed Graph Attention Network (MGAT)

In [None]:
# decide between implementing like this and implementing in main module

class MGAT(nn.Module):
    def __init__(self, input_size, weights_size, leakyrelu_slope):
        super(MGAT, self).__init__()
        self.input_size = input_size
        self.weights_size = weights_size
        self.leakyrelu_slope = leakyrelu_slope
        
        self.sgat = SGAT(self.input_size, self.weights_size, self.leakyrelu_slope)

    def forward(self, input, neighborhood, num_heads):
        attention_weights =  # initialize to correct shape
        output_features =  # initialize to correct shape
        for i in range(num_heads):
            attention_weights[i], output_features[[i]] = self.sgat(input, neighborhood)  # should we initialize fresh SGAT?

        return attention_weights, output_features  # check shape of output_features

#Scaffolding

In [None]:
class MANSF(nn.Module):
    def __init__(self, input_size, output_size, leakyrelu_slope, elu_alpha):
        super(MANSF, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.leakyrelu_slope = leakyrelu_slope
        self.elu_alpha = elu_alpha

        # fill in parameters
        self.gru_p = GRU()
        self.gru_m = GRU()
        self.gru_s = GRU()
        self.attn_p = SelfAttention()
        self.attn_m = SelfAttention()
        self.attn_s = SelfAttention()
        self.blend = Blend()
        self.elu = nn.ELU(elu_alpha)
        self.sigmoid = nn.Sigmoid()

    def forward(self, input, neighborhoods, num_heads):
