# GAT
GAT baselines
* GATNode for node classifcation
* GATGraph for graph classifcation

#### Set Up

In [1]:
import sys
import os
import random


# Add the absolute path to the src directory 
os.chdir('../src')
src_path = os.path.abspath('../src/')
sys.path.insert(0, src_path)

from setup import *
seed = 123
import random
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# import 3 datasets
imdb_dataset, cora_dataset, enzyme_dataset = load_data()

## Node classifcation: CORA

In [None]:
# init model
hidden_channels = 32 
layers = 2
cora_mdl = GATNode(cora_dataset.num_features, 
                       hidden_channels, 
                       cora_dataset.num_classes, 16, layers)
optimizer = torch.optim.Adam(cora_mdl.parameters(), 
                             lr=0.005, 
                             weight_decay=5e-4)

# train and test model
for _ in range(100):
    cora_mdl = node_train(cora_mdl, cora_dataset, optimizer)

# calculate accuracy
cora_test_acc, cora_train_acc, pred = node_test(cora_mdl, cora_dataset)
print(f'CORA Test Acc: {cora_test_acc:.4f}')
print(f'CORA Train Acc: {cora_train_acc:.4f}')

CORA Test Acc: 0.8010
CORA Train Acc: 1.0000


## Graph classifcation: IMDB

In [27]:
# set up dataset into train/test loaders
train_loader, test_loader = preprocess_data(imdb_dataset, 
                                            onehot=True, 
                                            batch_size=64)

In [28]:
# init model
hidden_channels = 15
layers = 2
heads = 5
imdb_mdl = GATGraph(imdb_dataset.num_features, 
                        hidden_channels, 
                        imdb_dataset.num_classes, heads, layers)
optimizer = torch.optim.Adam(imdb_mdl.parameters(), lr=0.01)
# train model
for _ in range(100):
    graph_train(imdb_mdl, train_loader, optimizer)
# test and trian accuracy
test_acc, train_acc = graph_test(imdb_mdl, test_loader, train_loader)
print(f"Final TEST Accuracy on IMDB: {test_acc:.4f}")
print(f"Final TRAIN Accuracy on IMDB: {train_acc:.4f}")

Final TEST Accuracy on IMDB: 0.6400
Final TRAIN Accuracy on IMDB: 0.7812


## Graph classification: ENZYME

In [2]:
# set up dataset
train_loader, test_loader = load_enzyme()

In [17]:
# init model
hidden_channels = 32
layers = 3
heads = 2
enzyme_mdl = GATGraph(enzyme_dataset.num_features, 
                          hidden_channels, 
                          enzyme_dataset.num_classes,
                        heads, layers)
optimizer = torch.optim.Adam(enzyme_mdl.parameters(), lr=0.0001,
                             weight_decay=1e-5)

# train model
for epoch in range(150):
    enzyme_mdl = graph_train(enzyme_mdl, train_loader, optimizer)
    test_acc, train_acc = graph_test(enzyme_mdl, test_loader, train_loader)
    if epoch % 10 == 0:
        print(f"EPOCH {epoch} : Train Acc = {train_acc:.4f} , Test Acc = {test_acc:.4f}")

# get test and train acc
test_acc, train_acc = graph_test(enzyme_mdl, test_loader, train_loader)
print(f"Final TEST Accuracy on ENZYME: {test_acc:.4f}")
print(f"Final TRAIN Accuracy on ENZYME: {train_acc:.4f}")

EPOCH 0 : Train Acc = 0.1646 , Test Acc = 0.1917
EPOCH 10 : Train Acc = 0.1750 , Test Acc = 0.1917
EPOCH 20 : Train Acc = 0.1646 , Test Acc = 0.1917
EPOCH 30 : Train Acc = 0.1771 , Test Acc = 0.2000
EPOCH 40 : Train Acc = 0.1875 , Test Acc = 0.2750
EPOCH 50 : Train Acc = 0.2042 , Test Acc = 0.2833
EPOCH 60 : Train Acc = 0.2104 , Test Acc = 0.2583
EPOCH 70 : Train Acc = 0.1938 , Test Acc = 0.3083
EPOCH 80 : Train Acc = 0.2021 , Test Acc = 0.2833
EPOCH 90 : Train Acc = 0.2021 , Test Acc = 0.2417
EPOCH 100 : Train Acc = 0.2042 , Test Acc = 0.2667
EPOCH 110 : Train Acc = 0.2042 , Test Acc = 0.3083
EPOCH 120 : Train Acc = 0.2125 , Test Acc = 0.2917
EPOCH 130 : Train Acc = 0.1958 , Test Acc = 0.2500
EPOCH 140 : Train Acc = 0.1958 , Test Acc = 0.2417
Final TEST Accuracy on ENZYME: 0.2667
Final TRAIN Accuracy on ENZYME: 0.1958
