In [1]:
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Res_GCN

### Model

In [2]:
class GCN_RES(nn.Module):
    def __init__(self, dim_in, dim_h, dim_out, num_layers=5):
        super(GCN_RES, self).__init__()
        # 网络层数
        self.num_layers = num_layers

        self.convs = nn.ModuleList()
        self.norms = nn.ModuleList()

        # 输入层
        self.convs.append(GCNConv(dim_in, dim_h))
        self.norms.append(nn.LayerNorm(dim_h))
        
        # 中间层
        for _ in range(num_layers - 2):
            self.convs.append(GCNConv(dim_h, dim_h))
            self.norms.append(nn.LayerNorm(dim_h))
        
        # 输出层
        self.convs.append(GCNConv(dim_h, dim_out))
        self.norms.append(nn.LayerNorm(dim_out))

        self.apply(self.weights_init)

    def weights_init(self, m):
        if isinstance(m, nn.Linear):
            nn.init.xavier_uniform_(m.weight.data)
            if m.bias is not None:
                m.bias.data.fill_(0.0)

    def forward(self, x, edge_index, edge_weight=None):
        for i in range(self.num_layers):
            if i == 0:
                h = self.convs[i](x, edge_index, edge_weight)
                h = self.norms[i](h)
                res = F.relu(h) + x
                h = F.dropout(res, p=0.6, training=self.training)
                
            elif i != (self.num_layers - 1):
                h = self.convs[i](h, edge_index, edge_weight)
                h = self.norms[i](h)
                res = F.relu(h) + res
                h = F.dropout(res, p=0.6, training=self.training)
                
            else:
                h = self.convs[i](h, edge_index, edge_weight)
                h = self.norms[i](h)

        return h

### Train

In [3]:
def train(model, data, optimizer, criterion, count, epochs=200):
    train_loss = []
    train_acc = []
    train_f1 = []
    val_loss = []
    val_accs = []
    val_accs1 = []
    model.train()
    for epoch in range(epochs+1):
        data = data.to(device)
        optimizer.zero_grad()
        _ = model(data.x, data.edge_index, data.edge_attr)
        out = F.log_softmax(_, dim=1)

        loss = criterion(out[data.train_mask], data.y[data.train_mask])
        train_loss.append(loss.item())

        loss.backward()
        optimizer.step()

        preds = out.argmax(dim=1)[data.train_mask].cpu()
        acc = accuracy_score(data.y[data.train_mask].cpu(), preds)
        train_acc.append(acc)
        f1 = f1_score(data.y[data.train_mask].cpu(), preds, average='macro')
        train_f1.append(f1)

        model.eval()
        with torch.no_grad():
            val_loss_ = criterion(out[data.val_mask], data.y[data.val_mask])
            val_loss.append(val_loss_.item())
            
            _ = model(data.x, data.edge_index, data.edge_attr)
            out = F.log_softmax(_, dim=1)
            
            val_acc = accuracy_score(data.y[data.val_mask].cpu(), out.argmax(dim=1)[data.val_mask].cpu())
            val_accs.append(val_acc)
            val_accs1.append(val_acc)

        if epoch % 10 == 0:
            print(f'Epoch [{epoch:03d}/{epochs}], Train_Loss: {loss.item():0.3f}, Val_Loss: {val_loss[-1]:0.3f}, Accuracy: {acc:.3f}, Macro-F1: {f1:.3f}, Val_acc: {max(val_accs):.3f}')
            val_accs = []
            
    return pd.DataFrame(data={f'train_loss_{count}':train_loss,f'val_loss_{count}':val_loss,f'train_acc_{count}':train_acc,f'val_acc_{count}':val_accs1})
    # return pd.DataFrame(data={num_layer: val_accs1})

### Experience

In [5]:
# dataset = torch.load('data/final_data/dataset_torch/shanghai_2019.pth')
data_name = ['shanghai_2019','shanghai_2018','suzhou_2018']

In [6]:
for data in data_name:
    dataset = torch.load(f'data/final_data/dataset_torch/{data}.pth')
    print('dataset: ',data)
    for num_layer in range(1,11):
        print(f'num layer = {num_layer}')
        df = pd.DataFrame()
        for _ in range(10):
            model = GCN_RES(dataset.num_node_features, dataset.num_node_features, dataset.num_classes, num_layer).to(device)
            criterion = torch.nn.CrossEntropyLoss()
            optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4)
            train_log = train(model, dataset, optimizer, criterion, _, epochs=200)
            df = pd.concat([df,train_log], axis=1)
            print('\n')
        df.to_csv(f'data/final_data/logging/{data}_layer{num_layer}_res.csv', index=False)

dataset:  shanghai_2019
num layer = 1
Epoch [000/200], Train_Loss: 8.132, Val_Loss: 8.132, Accuracy: 0.000, Macro-F1: 0.000, Val_acc: 0.000
Epoch [010/200], Train_Loss: 2.826, Val_Loss: 2.914, Accuracy: 0.361, Macro-F1: 0.209, Val_acc: 0.242
Epoch [020/200], Train_Loss: 1.689, Val_Loss: 1.682, Accuracy: 0.492, Macro-F1: 0.446, Val_acc: 0.557
Epoch [030/200], Train_Loss: 1.320, Val_Loss: 1.584, Accuracy: 0.551, Macro-F1: 0.548, Val_acc: 0.412
Epoch [040/200], Train_Loss: 1.176, Val_Loss: 1.271, Accuracy: 0.643, Macro-F1: 0.649, Val_acc: 0.656
Epoch [050/200], Train_Loss: 1.117, Val_Loss: 1.323, Accuracy: 0.675, Macro-F1: 0.677, Val_acc: 0.657
Epoch [060/200], Train_Loss: 1.079, Val_Loss: 1.219, Accuracy: 0.679, Macro-F1: 0.682, Val_acc: 0.669
Epoch [070/200], Train_Loss: 1.046, Val_Loss: 1.223, Accuracy: 0.694, Macro-F1: 0.697, Val_acc: 0.666
Epoch [080/200], Train_Loss: 1.019, Val_Loss: 1.187, Accuracy: 0.702, Macro-F1: 0.705, Val_acc: 0.671
Epoch [090/200], Train_Loss: 0.995, Val_Loss

## GCN

### Model

In [6]:
class GCN(nn.Module):
    def __init__(self, dim_in, dim_h, dim_out, num_layers=5):
        super(GCN, self).__init__()
        # 网络层数
        self.num_layers = num_layers

        self.convs = nn.ModuleList()

        # 输入层
        self.convs.append(GCNConv(dim_in, dim_h))
        
        # 中间层
        for _ in range(num_layers - 2):
            self.convs.append(GCNConv(dim_h, dim_h))
        
        # 输出层
        self.convs.append(GCNConv(dim_h, dim_out))

        self.apply(self.weights_init)

    def weights_init(self, m):
        if isinstance(m, nn.Linear):
            nn.init.xavier_uniform_(m.weight.data)
            if m.bias is not None:
                m.bias.data.fill_(0.0)

    def forward(self, x, edge_index, edge_weight=None):
        for i in range(self.num_layers):
            if i == 0:
                h = self.convs[i](x, edge_index, edge_weight)
                h = F.dropout(h, p=0.6, training=self.training)
                
            elif i != (self.num_layers - 1):
                h = self.convs[i](h, edge_index, edge_weight)
                h = F.dropout(h, p=0.6, training=self.training)
                
            else:
                h = self.convs[i](h, edge_index, edge_weight)

        return h

### Experience

In [9]:
# dataset = torch.load('data/final_data/dataset_torch/shanghai_2019.pth')

In [8]:
for data in data_name:
    dataset = torch.load(f'data/final_data/dataset_torch/{data}.pth')
    print('dataset: ',data)
    if data != 'shanghai_2019':
        for num_layer in range(1,11):
            print(f'num layer = {num_layer}')
            df = pd.DataFrame()
            for _ in range(10):
                model = GCN(dataset.num_node_features, dataset.num_node_features, dataset.num_classes, num_layer).to(device)
                criterion = torch.nn.CrossEntropyLoss()
                optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4)
                train_log = train(model, dataset, optimizer, criterion, _, epochs=200)
                df = pd.concat([df,train_log], axis=1)
                print('\n')
            df.to_csv(f'data/final_data/logging/{data}_layer{num_layer}.csv', index=False) 
    else:
        for num_layer in range(5,11):
            print(f'num layer = {num_layer}')
            df = pd.DataFrame()
            for _ in range(10):
                model = GCN(dataset.num_node_features, dataset.num_node_features, dataset.num_classes, num_layer).to(device)
                criterion = torch.nn.CrossEntropyLoss()
                optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4)
                train_log = train(model, dataset, optimizer, criterion, _, epochs=200)
                df = pd.concat([df,train_log], axis=1)
                print('\n')
            df.to_csv(f'data/final_data/logging/{data}_layer{num_layer}.csv', index=False)                               

dataset:  shanghai_2019
num layer = 5
Epoch [000/200], Train_Loss: 2.200, Val_Loss: 2.205, Accuracy: 0.113, Macro-F1: 0.110, Val_acc: 0.393
Epoch [010/200], Train_Loss: 1.343, Val_Loss: 1.492, Accuracy: 0.609, Macro-F1: 0.606, Val_acc: 0.570
Epoch [020/200], Train_Loss: 1.134, Val_Loss: 1.349, Accuracy: 0.670, Macro-F1: 0.667, Val_acc: 0.607
Epoch [030/200], Train_Loss: 1.050, Val_Loss: 1.261, Accuracy: 0.689, Macro-F1: 0.695, Val_acc: 0.653
Epoch [040/200], Train_Loss: 0.979, Val_Loss: 1.224, Accuracy: 0.706, Macro-F1: 0.710, Val_acc: 0.666
Epoch [050/200], Train_Loss: 0.907, Val_Loss: 1.191, Accuracy: 0.729, Macro-F1: 0.730, Val_acc: 0.679
Epoch [060/200], Train_Loss: 0.851, Val_Loss: 1.144, Accuracy: 0.732, Macro-F1: 0.736, Val_acc: 0.688
Epoch [070/200], Train_Loss: 0.796, Val_Loss: 1.203, Accuracy: 0.745, Macro-F1: 0.750, Val_acc: 0.695
Epoch [080/200], Train_Loss: 0.764, Val_Loss: 1.108, Accuracy: 0.759, Macro-F1: 0.760, Val_acc: 0.710
Epoch [090/200], Train_Loss: 0.715, Val_Loss