<a href="https://colab.research.google.com/github/LaZzyMan/Notebook/blob/master/gnn_pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch.nn import Module, Dropout, ReLU, Linear, Softmax
import torch.nn.functional as F

from sklearn import model_selection
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score, accuracy_score, classification_report, confusion_matrix
import pandas as pd
import numpy as np
import pickle
from tqdm import tqdm

from read_graph import get_sg_graph
from gnn_conv import GCNConv

ModuleNotFoundError: ignored

In [None]:
class MultiGCN(Module):
    def __init__(self, feature_dim=0, dropout=0.4, num_graph=3):
        super(MultiGCN, self).__init__()
        self.feature_dim = feature_dim
        self.act = ReLU()
        self.dropout = Dropout(p=dropout)
        self.out = Linear(64, 6)
        self.num_graph = num_graph
        for i in range(num_graph):
            exec('self.conv_1_{} = GCNConv(in_channels=self.feature_dim, out_channels=64, improved=False)'.format(i))
            exec('self.conv_2_{} = GCNConv(in_channels=64 * 3, out_channels=64, improved=False)'.format(i))
        self.pred = Softmax(dim=-1)
        self.conv_1 = []
        self.conv_2 = []

    def forward(self, feature=None, adj_indices=None, adj_values=None, out_indices=None, val_indices=None):
        for i in range(self.num_graph):
            exec('self.conv_1.append(self.conv_1_{})'.format(i))
            exec('self.conv_2.append(self.conv_2_{})'.format(i))
        x_out = self.dropout(feature)
        x_outs = [self.act(conv(x_out, adj_indice, adj_value))
                  for adj_indice, adj_value, conv in zip(adj_indices, adj_values, self.conv_1)]
        x_out = torch.cat(x_outs, 1)
        x_out = self.dropout(x_out)
        x_outs = [self.act(conv(x_out.float(), adj_indice, adj_value))
                  for adj_indice, adj_value, conv in zip(adj_indices, adj_values, self.conv_2)]
        x_out = self.act(sum(x_outs) / 3)
        train_out = torch.index_select(x_out, 0, out_indices)
        val_out = torch.index_select(x_out, 0, val_indices)
        train_out = self.out(train_out.float())
        val_out = self.out(val_out.float())
        return self.pred(train_out), self.pred(val_out)

In [None]:
# 参数设置
lr = .1
l2_reg = 0.
epochs = 1000
patience = 200
dropout = .4

# 读取数据
labels = pickle.load(open('data/label.pickle', 'rb'))
exclude_node = ['32715', '32955', '37779', '37812', '37831', '38504', '39172', '39675', '39981', '39043']
for node in exclude_node:
    labels.pop(node)
num_nodes = len(labels)
X = []
Y = []
for node, label in labels.items():
    X.append(node)
    Y.append(label)
labels = pd.Series(Y, index=X)
graphs = get_sg_graph(K=50, feature=['land_cover', 'poi', 'building'], d=1.)

tr_farc = .2
val_frac = .4
# 分层抽样分割训练集和测试集
train_set, test_set = model_selection.train_test_split(
    labels, train_size=int(num_nodes * tr_farc), test_size=None, stratify=labels
)
# 将测试集分割份被用于验证和评价测试
val_set, test_set = model_selection.train_test_split(
    test_set, train_size=int(num_nodes * val_frac), test_size=None, stratify=test_set
)
# 将标签转为二进制编码
bin_encoding = LabelEncoder()
train_Y = torch.tensor(bin_encoding.fit_transform(train_set), dtype=torch.long)
val_Y = torch.tensor(bin_encoding.fit_transform(val_set), dtype=torch.long)
test_Y = torch.tensor(bin_encoding.fit_transform(test_set), dtype=torch.long)

features = torch.tensor(graphs[0].node_features(), dtype=torch.float32)
adjs = [graph.to_adjacency_matrix(weighted='traffic').tocoo() for graph in graphs]
adj_indices = [torch.tensor(np.hstack((A.row[:, None], A.col[:, None])).astype("int64").T, dtype=torch.long) for A in adjs]
adj_values = [torch.tensor(A.data) for A in adjs]
out_indices = torch.tensor(graphs[0].node_ids_to_ilocs(np.asarray(train_set.index)).astype("int64"), dtype=torch.long)
val_indices = torch.tensor(graphs[0].node_ids_to_ilocs(np.asarray(val_set.index)).astype("int64"), dtype=torch.long)

In [None]:
device = torch.device('cpu')
model = MultiGCN(feature_dim=features.shape[1], dropout=dropout, num_graph=len(graphs)).to(device)
features = features.to(device)
adj_indices = [adj_indice.to(device) for adj_indice in adj_indices]
adj_values = [adj_value.to(device) for adj_value in adj_values]
out_indices = out_indices.to(device)
val_indices = val_indices.to(device)
val_Y = val_Y.to(device)
train_Y = train_Y.to(device)

In [None]:
weight_decay_list = (param for name, param in model.named_parameters() if name[-4:] != 'bias' and "bn" not in name)
no_decay_list = (param for name, param in model.named_parameters() if name[-4:] == 'bias' or "bn" in name)
parameters = [{'params': weight_decay_list}, {'params': no_decay_list, 'weight_decay': 0.}]
optimizer = torch.optim.Adam(parameters, lr=lr, weight_decay=l2_reg)

In [None]:
best_score = 0.
es = 0
for epoch in range(1, epochs):
    model.train()
    optimizer.zero_grad()
    train_pred_Y, val_pred_Y = model(features, adj_indices, adj_values, out_indices, val_indices)
    loss = F.cross_entropy(train_pred_Y, train_Y)
    val_loss = F.cross_entropy(val_pred_Y, val_Y)
    train_f1 = f1_score(bin_encoding.inverse_transform(train_pred_Y.max(dim=-1).indices),
                        bin_encoding.inverse_transform(train_Y), average='micro')
    val_f1 = f1_score(bin_encoding.inverse_transform(val_pred_Y.max(dim=-1).indices),
                      bin_encoding.inverse_transform(val_Y), average='micro')
    print(f'Epoch {epoch}/{epochs}')
    print(f'loss: {loss} - F1-micro: {train_f1} - val_loss: {val_loss} - val_F1: {val_f1}')
    if val_f1 > best_score:
        best_score = val_f1
        es = 0
        torch.save(model.state_dict(), 'checkpoint.pt')
    else:
        es += 1
    if es > patience:
        print('Early Stopped.')
        break
    loss.backward()
    optimizer.step()
model.load_state_dict(torch.load('checkpoint.pt'))