## Spatial Adaptive Graph Neural Network for POI Graph Learning 
In this tuorial, we will go through how to run the Spatial Adaptive Graph Neural Network (SA-GNN) to learn on the POI graph. If you are intersted in more details, please refer to the paper "Competitive analysis for points of interest".


In [1]:
import os
import pgl
import pickle
import pandas as pd
import numpy as np
from random import shuffle
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from sagnn import SpatialOrientedAGG, SpatialAttnProp
paddle.set_device('cpu')

CPUPlace

### Load dataset and construct the POI graph

In [2]:
def load_dataset(file_path, dataset):
    """
    1) Load the POI dataset from four files: edges file, two-dimensional coordinate file, POI feature file and label file.
    2) Construct the PGL-based graph and return the pgl.Graph instance.
    """
    edges = pd.read_table(os.path.join(file_path, '%s.edge' % dataset), header=None, sep=' ')
    # ex, ey = np.array(edges[:][0]), np.array(edges[:][1])
    edges = list(zip(edges[:][0],edges[:][1]))
    coords = pd.read_table(os.path.join(file_path, '%s.coord' % dataset), header=None, sep=' ')
    coords = np.array(coords)

    feat_path = os.path.join(file_path, '%s.feat' % dataset) # pickle file
    if os.path.exists(feat_path):
        with open(feat_path, 'rb') as f:
            features = pickle.load(f)
    else:
        features = np.eye(len(coords))

    graph = pgl.Graph(edges, num_nodes=len(coords), node_feat={"feat": features, 'coord': coords})
    ind_labels = pd.read_table(os.path.join(file_path, '%s.label' % dataset), header=None, sep=' ')
    inds_1 = np.array(ind_labels)[:,0]
    inds_2 = np.array(ind_labels)[:,1]
    labels = np.array(ind_labels)[:,2:]
    
    return graph, (inds_1, inds_2), labels

### Build the SA-GNN model for link prediction

In [3]:
class DenseLayer(nn.Layer):
    def __init__(self, in_dim, out_dim, activation=F.relu, bias=True):
        super(DenseLayer, self).__init__()
        self.activation = activation
        if not bias:
            self.fc = nn.Linear(in_dim, out_dim, bias_attr=False)
        else:
            self.fc = nn.Linear(in_dim, out_dim)
    
    def forward(self, input_feat):
        return self.activation(self.fc(input_feat))

class SAGNNModel(nn.Layer):
    def __init__(self, infeat_dim, hidden_dim=128, dense_dims=[128,128], num_heads=4, feat_drop=0.2, num_sectors=4, max_dist=0.1, grid_len=0.001, num_convs=1):
        super(SAGNNModel, self).__init__()
        self.num_convs = num_convs
        self.agg_layers = nn.LayerList()
        self.prop_layers = nn.LayerList()
        in_dim = infeat_dim
        for i in range(num_convs):
            agg = SpatialOrientedAGG(in_dim, hidden_dim, num_sectors, transform=False, activation=None)
            prop = SpatialAttnProp(hidden_dim, hidden_dim, num_heads, feat_drop, max_dist, grid_len, activation=None)
            self.agg_layers.append(agg)
            self.prop_layers.append(prop)
            in_dim = num_heads * hidden_dim

        self.mlp = nn.LayerList()
        for hidden_dim in dense_dims:
            self.mlp.append(DenseLayer(in_dim, hidden_dim, activation=F.relu))
            in_dim = hidden_dim
        self.output_layer = nn.Linear(2 * in_dim, 1)
    
    def forward(self, graph, inds):
        feat_h = graph.node_feat['feat']
        for i in range(self.num_convs):
            feat_h = self.agg_layers[i](graph, feat_h)
            graph = graph.tensor()
            feat_h = self.prop_layers[i](graph, feat_h)

        for fc in self.mlp:
            feat_h = fc(feat_h)
        feat_h = paddle.concat([paddle.gather(feat_h, inds[0]), paddle.gather(feat_h, inds[1])], axis=-1)
        output = F.sigmoid(self.output_layer(feat_h))
        return output

Here we load a mock dataset for demonstration, you can load the full dataset as you want.

Note that all needed files should include:
1) one edge file (dataset.edge) for POI graph construction;
2) one coordinate file (dataset.coord) for POI position;
3) one label file (dataset.label) for training model;
4) one feature file (dataset.feat) for POI feature loading, which is optional. If there is no dataset.feat, the default POI feature is the one-hot vector.

In [4]:
graph, inds, labels = load_dataset('./data/', 'mock_poi')
ids = [i for i in range(len(labels))]
shuffle(ids)
train_num = int(0.6*len(labels))
train_inds = (inds[0][ids[:train_num]], inds[1][ids[:train_num]])
test_inds = (inds[0][ids[train_num:]], inds[1][ids[train_num:]])
train_labels = labels[ids[:train_num]]
test_labels = labels[ids[train_num:]]
print("dataset num: %s" % (len(labels)), "training num: %s" % (len(train_labels)))
infeat_dim = graph.node_feat['feat'].shape[0]

model = SAGNNModel(infeat_dim)
optim = paddle.optimizer.Adam(0.001, parameters=model.parameters())

dataset num: 300 training num: 180


### Strart training

In [6]:
def train(model, graph, inds, labels, optim):
    model.train()
    graph = graph.tensor()
    inds = paddle.to_tensor(inds, 'int64')
    labels = paddle.to_tensor(labels, 'float32')
    preds = model(graph, inds)
    bce_loss = paddle.nn.BCELoss()
    loss = bce_loss(preds, labels)
    loss.backward()
    optim.step()
    optim.clear_grad()
    return loss.numpy()[0]

def evaluate(model, graph, inds, labels):
    model.eval()
    graph = graph.tensor()
    inds = paddle.to_tensor(inds, 'int64')
    labels = paddle.to_tensor(labels, 'float32')
    preds = model(graph, inds)
    bce_loss = paddle.nn.BCELoss()
    loss = bce_loss(preds, labels)
    return loss.numpy()[0],  1.0*np.sum(preds.numpy().astype(int) == labels.numpy().astype(int), axis=0) / len(labels)

for epoch_id in range(5):
    train_loss = train(model, graph, train_inds, train_labels, optim)
    print("epoch:%d train/loss:%s" % (epoch_id, train_loss))

test_loss, test_acc = evaluate(model, graph, test_inds, test_labels)
print("test loss: %s, test accuracy: %s" % (test_loss, test_acc))


epoch:0 train/loss:0.5990816
epoch:1 train/loss:0.5992603
epoch:2 train/loss:0.5916268
epoch:3 train/loss:0.58739626
epoch:4 train/loss:0.56397086
test loss: 0.63546157, test accuracy: [0.65]
