In [1]:
import argparse
import networkx as nx
import torch
import torch.nn as nn
from torch_geometric.nn import GCNConv

### TODO:
Implement teacher network architecture and training along with flags for datasets and teacher model architecture (Chanikya and Nithin)
eg: python3 train_teacher.py --dataset=cora --model=SAGE --epochs-100 --lr=0.01 . Add flags for other hyperparameters if necessary (Chanikya and Nithin)

Other teacher model architectures - GCN, GAT, APPNP (Chanikya and Nithin + others based on availability)

In [2]:
from torch_geometric.datasets import Planetoid
dataset = Planetoid(root='./Cora', name='Cora')

data = dataset[0]
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

Dataset: Cora():
Number of graphs: 1
Number of features: 1433
Number of classes: 7


In [3]:
# data.edge_index.t()

In [4]:
def load_data(dataset):
    if dataset == "cora":
        dataset = Planetoid(root='./Cora', name='Cora')
        data = dataset[0]
        graph_nx = nx.Graph()
        graph_nx.add_edges_from(data.edge_index.t().tolist())

        # Adding self-loops
        # graph_nx.add_edges_from((n, n) for n in graph_nx.nodes())
        
        # adj_tensor = torch.tensor(nx.to_numpy_array(graph_nx), dtype=torch.float).to('cuda')
        features = data.x
        labels = data.y

        train_idx = data.train_mask.nonzero(as_tuple=True)[0]
        val_idx = data.val_mask.nonzero(as_tuple=True)[0]
        test_idx = data.test_mask.nonzero(as_tuple=True)[0]
        
        return data.edge_index, features, labels, data.train_mask, data.val_mask, data.test_mask

In [19]:
cnt = 0
140+1000+500
2708
for x in data.val_mask:
    cnt += (x==True)
print(cnt)

tensor(500)


GCN => num layers, hidden, input dim, out, dp, activa,  

In [20]:
class GCN(nn.Module):
    def __init__(
        self,
        num_layers,
        input_dim,
        hidden_dim,
        output_dim,
        dropout_ratio,
        activation,
        norm_type="none"
    ):
        super().__init__()
        self.num_layers = num_layers
        self.norm_type = norm_type
        self.dropout = nn.Dropout(dropout_ratio)
        self.activation = activation

        self.layers = nn.ModuleList()
        self.norms = nn.ModuleList()

        if num_layers == 1:
            self.layers.append(GCNConv(input_dim, output_dim))
        else:
            self.layers.append(GCNConv(input_dim, hidden_dim))
            if norm_type == "batch":
                self.norms.append(nn.BatchNorm1d(hidden_dim))
            elif norm_type == "layer":
                self.norms.append(nn.LayerNorm(hidden_dim))

            for _ in range(num_layers - 2):
                self.layers.append(GCNConv(hidden_dim, hidden_dim))
                if norm_type == "batch":
                    self.norms.append(nn.BatchNorm1d(hidden_dim))
                elif norm_type == "layer":
                    self.norms.append(nn.LayerNorm(hidden_dim))

            self.layers.append(GCNConv(hidden_dim, output_dim))

    def forward(self, x, edge_index):
        h_list = []
        h = x
        for l, layer in enumerate(self.layers):
            h = layer(h, edge_index)
            if l != self.num_layers - 1:
                if self.norm_type != "none":
                    h = self.norms[l](h)
                h = self.activation(h)
                h = self.dropout(h)
                h_list.append(h)
        return h_list[-1], h

In [21]:
model = GCN(
    num_layers=3,
    input_dim=dataset.num_node_features,
    hidden_dim=64,
    output_dim=dataset.num_classes,
    dropout_ratio=0.8,
    activation=nn.functional.relu,
    norm_type="none"
)

In [24]:
def train(model, data, edge_index, labels, train_idx, optimizer, criterion):
    model.train()
    optimizer.zero_grad()
    _, out = model(data, edge_index)
    loss = criterion(out[train_idx], labels[train_idx])
    loss.backward()
    optimizer.step()
    return loss.item()

@torch.no_grad()
def evaluate(model, data, edge_index, labels, idx):
    model.eval()
    _, out = model(data, edge_index)
    pred = out[idx].argmax(dim=1)
    correct = (pred == labels[idx]).sum().item()
    acc = correct / sum(idx)
    return acc

In [25]:
edge_index, features, labels, train_idx, val_idx, test_idx = load_data("cora")
model = GCN(
    num_layers=3,
    input_dim=dataset.num_node_features,
    hidden_dim=64,
    output_dim=dataset.num_classes,
    dropout_ratio=0.8,
    activation=nn.functional.relu,
    norm_type="none"
)
# model = GCN1(dataset.num_node_features, 64, dataset.num_classes)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

for epoch in range(1, 150):
    loss = train(model, features, edge_index, labels, train_idx, optimizer, criterion)
    val_acc = evaluate(model, features, edge_index, labels, val_idx)
    if epoch % 10 == 0 or epoch == 1:
        test_acc = evaluate(model, features, edge_index, labels, test_idx)
        print(f"Epoch {epoch:03d} | Loss: {loss:.4f} | Val Acc: {val_acc:.4f} | Test Acc: {test_acc:.4f}")


Epoch 001 | Loss: 1.9586 | Val Acc: 0.3040 | Test Acc: 0.3320
Epoch 010 | Loss: 0.7298 | Val Acc: 0.8260 | Test Acc: 0.8380
Epoch 020 | Loss: 0.1006 | Val Acc: 0.7920 | Test Acc: 0.8180
Epoch 030 | Loss: 0.1016 | Val Acc: 0.7940 | Test Acc: 0.8050
Epoch 040 | Loss: 0.0808 | Val Acc: 0.7700 | Test Acc: 0.8010
Epoch 050 | Loss: 0.1045 | Val Acc: 0.7840 | Test Acc: 0.8000
Epoch 060 | Loss: 0.0610 | Val Acc: 0.7840 | Test Acc: 0.7970
Epoch 070 | Loss: 0.0367 | Val Acc: 0.7700 | Test Acc: 0.7830
Epoch 080 | Loss: 0.0522 | Val Acc: 0.7880 | Test Acc: 0.8090
Epoch 090 | Loss: 0.0806 | Val Acc: 0.7540 | Test Acc: 0.7800
Epoch 100 | Loss: 0.0904 | Val Acc: 0.7620 | Test Acc: 0.7930
Epoch 110 | Loss: 0.0576 | Val Acc: 0.7600 | Test Acc: 0.8020
Epoch 120 | Loss: 0.0235 | Val Acc: 0.7720 | Test Acc: 0.8000
Epoch 130 | Loss: 0.0393 | Val Acc: 0.7700 | Test Acc: 0.7980
Epoch 140 | Loss: 0.0351 | Val Acc: 0.7640 | Test Acc: 0.7990


In [26]:
emb_t, z_soft = model.forward(data.x, data.edge_index)
print(emb_t.shape)
print(z_soft.shape)

torch.Size([2708, 64])
torch.Size([2708, 7])


In [34]:
z_soft[0]

tensor([-1.8515, -4.1910, -2.3432,  9.2844, -2.0349, -5.1608, -3.0414],
       grad_fn=<SelectBackward0>)

In [35]:
# save embeddings, softmax scores tensors above in a directory
def save_tensors(emb_t, z_soft, output_dir):
    torch.save(emb_t, f"{output_dir}/embeddings.pt")
    torch.save(z_soft, f"{output_dir}/label_scores.pt")
# Example usage
output_dir = "./teacher_outputs"
import os
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
save_tensors(emb_t, z_soft, output_dir)

In [32]:
# load embeddings, z_soft
output_dir = "./teacher_outputs"
emb_t = torch.load(f"{output_dir}/embeddings.pt")
z_soft = torch.load(f"{output_dir}/softmax_scores.pt")

In [33]:
emb_t.shape, z_soft.shape

(torch.Size([2708, 64]), torch.Size([2708, 7]))