# GAT
GAT baselines
* GATNode for node classifcation
* GATGraph for graph classifcation

#### Set Up

In [1]:
import sys
import os

# Add the absolute path to the src directory 
os.chdir('../src')
src_path = os.path.abspath('../src/')
sys.path.insert(0, src_path)

from setup import *

In [2]:
# import 3 datasets
imdb_dataset, cora_dataset, enzyme_dataset = load_data()

# global variables
EPOCHS = 500 # number of epochs to run while training

## Node classifcation: CORA

In [3]:
# init model
hidden_channels = 32 
layers = 2
cora_mdl = GATNode(cora_dataset.num_features, 
                       hidden_channels, 
                       cora_dataset.num_classes, 16, layers)
optimizer = torch.optim.Adam(cora_mdl.parameters(), 
                             lr=0.005, 
                             weight_decay=5e-4)

# train and test model
for _ in range(EPOCHS):
    cora_mdl = node_train(cora_mdl, cora_dataset, optimizer)

# calculate accuracy
cora_test_acc, cora_train_acc, pred = node_test(cora_mdl, cora_dataset)
print(f'CORA Test Acc: {cora_test_acc:.4f}')
print(f'CORA Train Acc: {cora_train_acc:.4f}')

CORA Test Acc: 0.7510
CORA Train Acc: 1.0000


## Graph classifcation: IMDB

In [4]:
# set up dataset into train/test loaders
train_loader, test_loader = preprocess_data(imdb_dataset, 
                                                         onehot=True, 
                                                         batch_size=64)

In [5]:
# init model
hidden_channels = 15
layers = 2
heads = 5
imdb_mdl = GATGraph(imdb_dataset.num_features, 
                        hidden_channels, 
                        imdb_dataset.num_classes, heads, layers)
optimizer = torch.optim.Adam(imdb_mdl.parameters(), lr=0.01)
# train model
for _ in range(EPOCHS):
    graph_train(imdb_mdl, train_loader, optimizer)
# test and trian accuracy
test_acc, train_acc = graph_test(imdb_mdl, test_loader, train_loader)
print(f"Final TEST Accuracy on ENZYME: {test_acc:.4f}")
print(f"Final TRAIN Accuracy on ENZYME: {train_acc:.4f}")

Final TEST Accuracy on ENZYME: 0.7150
Final TRAIN Accuracy on ENZYME: 0.7638


## Graph classification: ENZYME

In [6]:
# set up dataset
train_loader, test_loader = preprocess_data(enzyme_dataset, 
                                                         onehot=False, 
                                                         batch_size=64)

In [7]:
# init model
hidden_channels = 20
layers = 4
heads = 5
enzyme_mdl = GATGraph(enzyme_dataset.num_features, 
                          hidden_channels, 
                          enzyme_dataset.num_classes,
                        heads, layers)
optimizer = torch.optim.Adam(enzyme_mdl.parameters(), lr=0.01,
                             weight_decay=5e-4)



# train model
for epoch in range(EPOCHS):
    graph_train(enzyme_mdl, train_loader, optimizer)

# get test and train acc
test_acc, train_acc = graph_test(enzyme_mdl, test_loader, train_loader)
print(f"Final TEST Accuracy on ENZYME: {test_acc:.4f}")
print(f"Final TRAIN Accuracy on ENZYME: {train_acc:.4f}")

Final TEST Accuracy on ENZYME: 0.1667
Final TRAIN Accuracy on ENZYME: 0.1667
