In [1]:
import pandas as pd
from collections import namedtuple
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import networkx as nx


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
label_index=302

In [3]:
# # 加载数据，并转换为torch.Tensor
# trainData = pd.read_pickle('../processed/corax_train.pkl')
# testData = pd.read_pickle('../processed/corax_test.pkl')
# validationData=pd.read_pickle('../processed/corax_validation.pkl')
# allData=pd.concat([trainData,testData,validationData],axis=0)

# # trainLabel = trainData[label_index]
# # trainData = trainData.drop(columns=label_index)

# # testLabel = testData[label_index]
# # testData = testData.drop(columns=label_index)

# # validationLabel = validationData[label_index]
# # validationData = validationData.drop(columns=label_index)

# allLabel= allData[label_index]
# allData = allData.drop(columns=label_index)

# G=nx.read_gpickle('../processed/corax_graph.gpickle')


In [4]:
feature = pd.read_pickle('../corpus/corax_features.pkl')
label = pd.read_pickle('../corpus/corax_labels.pkl')
adj_matrix=pd.read_pickle('../corpus/corax_adj.pkl')

label_onehot = torch.tensor(label)
y = torch.topk(label_onehot, 1)[1].squeeze(1).numpy()

G = nx.from_numpy_matrix(adj_matrix)

In [5]:
# allLabel.values

In [6]:
from torch_geometric.data import Data

# 确定x
x = torch.tensor(feature, dtype=torch.float)

# 确定edge_index
edge_index = torch.tensor(list(G.edges)).t().contiguous()

# 确定y
y = torch.tensor(y, dtype=torch.long)

train_mask = np.zeros(x.shape[0], dtype=bool)
val_mask = np.zeros(x.shape[0], dtype=bool)
test_mask = np.zeros(x.shape[0], dtype=bool)

train_mask[0:1180]=True
val_mask[1180:1680]=True
test_mask[1680:2681]=True

data=Data(x=x,edge_index=edge_index,y=y,train_mask=train_mask,val_mask=val_mask,test_mask=test_mask)
data

Data(x=[2680, 302], edge_index=[2, 5148], y=[2680], train_mask=[2680], val_mask=[2680], test_mask=[2680])

In [11]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv,GATConv,SAGEConv


# GCN：acc 0.7810
class GCN(torch.nn.Module):
    def __init__(self, num_node_features, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_node_features, 16)
        self.conv2 = GCNConv(16, num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = F.softmax(x, dim=1)

        return x

In [12]:
# GAT：acc 0.8060

class GAT(torch.nn.Module):
    def __init__(self, num_node_features, num_classes):
        super(GAT, self).__init__()
        self.conv1 = GATConv(in_channels=num_node_features,
                                    out_channels=16,
                                    heads=2)
        self.conv2 = GATConv(in_channels=2*16,
                                    out_channels=num_classes,
                                    heads=1)
        
    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        
        return F.log_softmax(x, dim=1)

In [16]:
# 0.78左右
class GraphSAGE(nn.Module):
    def __init__(self, num_node_features, num_classes):
        super(GraphSAGE, self).__init__()
        self.conv1 = SAGEConv(num_node_features, 16)
        self.conv2 = SAGEConv(16, num_classes)
        
    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        
        return F.log_softmax(x, dim=1)


In [17]:
num_node_features=data.x.shape[1]
num_classes = 7
model=GraphSAGE(num_node_features,num_classes)

In [18]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-4)
loss_function = torch.nn.CrossEntropyLoss()
model.train()
for epoch in range(200):
    out = model(data)
    optimizer.zero_grad()
    loss = loss_function(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

    print('Epoch {:03d} loss {:.4f}'.format(epoch, loss.item()))
    

model.eval()
_, pred = model(data).max(dim=1)
correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct / int(data.test_mask.sum())
print('GCN Accuracy: {:.4f}'.format(acc))

Epoch 000 loss 1.9664
Epoch 001 loss 1.8723
Epoch 002 loss 1.8240
Epoch 003 loss 1.7750
Epoch 004 loss 1.7217
Epoch 005 loss 1.6760
Epoch 006 loss 1.6188
Epoch 007 loss 1.5724
Epoch 008 loss 1.5316
Epoch 009 loss 1.5162
Epoch 010 loss 1.4582
Epoch 011 loss 1.4319
Epoch 012 loss 1.4106
Epoch 013 loss 1.3676
Epoch 014 loss 1.3452
Epoch 015 loss 1.3111
Epoch 016 loss 1.2998
Epoch 017 loss 1.2745
Epoch 018 loss 1.2437
Epoch 019 loss 1.2606
Epoch 020 loss 1.2205
Epoch 021 loss 1.1848
Epoch 022 loss 1.2036
Epoch 023 loss 1.1333
Epoch 024 loss 1.1576
Epoch 025 loss 1.1256
Epoch 026 loss 1.1215
Epoch 027 loss 1.1130
Epoch 028 loss 1.0953
Epoch 029 loss 1.0684
Epoch 030 loss 1.0689
Epoch 031 loss 1.0576
Epoch 032 loss 1.0057
Epoch 033 loss 1.0175
Epoch 034 loss 0.9882
Epoch 035 loss 1.0010
Epoch 036 loss 0.9535
Epoch 037 loss 0.9673
Epoch 038 loss 0.9739
Epoch 039 loss 0.9429
Epoch 040 loss 0.9148
Epoch 041 loss 0.9406
Epoch 042 loss 0.8907
Epoch 043 loss 0.9036
Epoch 044 loss 0.8719
Epoch 045 