In [1]:
from collections import defaultdict
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
import torch.optim as optim

In [2]:
import random_graph
node_df, edge_df = random_graph.random_graph_gcn(1000,3000,nums_features=120,)

In [31]:
def data_format_process(node_df,edge_df):
    """
    output
    x: node features (array: float)
    y: label         (array: float)
    adjacency_dict: a dict store neighbor node's info  {node_index:[neighbor1,neighbor2..]}
    train_mask: mask for providing training dataset (list: bool)
    test_mask: mask for providing testing dataset (list: bool)
    """
    # node_lookup: store node index
    node_lookup = pd.DataFrame({'node': node_df.index,}, index=node_df.cust_id)
    
    # delete no-edge-node 
    diff_node = list(set(node_df['cust_id'])-(set(node_df['cust_id']) - set(edge_df['cust_id']) - set(edge_df['opp_id'])))
    
    node_df = node_df.iloc[node_lookup.iloc[diff_node]['node']].reset_index(drop=True)
    
    # build neighbor dictionary
    node_lookup = pd.DataFrame({'node': node_df.index,}, index=node_df.cust_id)
    adjacency_dict = defaultdict(list)
    for cust,opp in zip(edge_df['cust_id'],edge_df['opp_id']):
        adjacency_dict[node_lookup.loc[cust]['node']].append(node_lookup.loc[opp]['node'])
    
    # convert to Array
    x = node_df[set(node_df) - {'cust_id', 'is_driver', 'is_reported'}].to_numpy()
    y = node_df.is_reported.to_numpy() * 1
    
    # mask conf
    train_mask = node_df.is_driver.to_numpy()
    test_mask = ~train_mask
    
    return x, y, adjacency_dict, train_mask, test_mask

In [32]:
x, y, adjacency_dict, train_mask, test_mask  = data_format_process(node_df,edge_df)

In [33]:
import itertools
import scipy.sparse as sp

In [34]:
def build_adjacency(adj_dict):
        """根据邻接表创建邻接矩阵"""
        edge_index = []
        num_nodes = len(adj_dict)
        for src, dst in adj_dict.items():
            edge_index.extend([src, v] for v in dst)
            edge_index.extend([v, src] for v in dst)
        # 去除重复的边
        edge_index = list(k for k, _ in itertools.groupby(sorted(edge_index)))
        edge_index = np.asarray(edge_index)
        adjacency = sp.coo_matrix((np.ones(len(edge_index)), 
                                   (edge_index[:, 0], edge_index[:, 1])),
                    shape=(num_nodes, num_nodes), dtype="float32")
        return adjacency

In [35]:
adjacency = build_adjacency(adjacency_dict)

In [36]:
adjacency.shape

(998, 998)

In [37]:

def normalization(adjacency):
    """计算 L=D^-0.5 * (A+I) * D^-0.5"""
    adjacency += sp.eye(adjacency.shape[0])    # 增加自连接
    degree = np.array(adjacency.sum(1))
    d_hat = sp.diags(np.power(degree, -0.5).flatten())
    return d_hat.dot(adjacency).dot(d_hat).tocoo()

In [38]:
adjacency = normalization(adjacency)

In [39]:
num_nodes, input_dim = x.shape
indices = torch.from_numpy(np.asarray([adjacency.row, 
                                       adjacency.col]).astype('int64')).long()
values = torch.from_numpy(adjacency.data.astype(np.float32))
tensor_adjacency = torch.sparse.FloatTensor(indices, values, 
                                            (num_nodes, num_nodes))

In [40]:
tensor_adjacency

tensor(indices=tensor([[  0,   0,   0,  ..., 997, 997, 997],
                       [  0,  72, 260,  ..., 644, 743, 997]]),
       values=tensor([0.1250, 0.1250, 0.1021,  ..., 0.1667, 0.1768, 0.2500]),
       size=(998, 998), nnz=6968, layout=torch.sparse_coo)

In [41]:
weight = torch.Tensor(np.random.rand(120,32))

In [42]:
x = torch.Tensor(x)

In [43]:
support = torch.mm(x,weight)

In [44]:
support.shape

torch.Size([998, 32])

In [45]:
torch.sparse.mm(tensor_adjacency, support)

tensor([[-2.1788,  0.0176,  0.0294,  ..., -0.2162, -1.3712, -0.2204],
        [ 3.2295,  2.4297,  5.7494,  ...,  5.0903,  6.1482,  4.7603],
        [-1.2034, -1.3787, -0.4547,  ..., -1.7536,  0.5767, -0.1689],
        ...,
        [-0.6672, -2.6697, -1.7699,  ..., -1.9914, -2.2019, -0.4455],
        [ 0.8897,  2.3477,  4.0818,  ...,  1.1187,  3.0042,  2.0533],
        [-0.2056, -2.8479, -1.3615,  ..., -0.9712, -0.4147,  1.1956]])

In [1]:
import sage_v2
import random_graph
node_df, edge_df = random_graph.random_graph_gcn(1000,3000,nums_features=120,)

In [2]:
a = sage_v2.run_model(node_df,edge_df)

data preprocessing..
data preprocessing complete!
-----------------------*-----------------------
after filtering single nodes
num of train instances: 554
num of test instances: 444
-----------------------*-----------------------
model structure
GraphSage(
  in_features=120, num_neighbors_list=[10, 10]
  (gcn): ModuleList(
    (0): SageGCN(
      in_features=120, out_features=128, aggr_hidden_method=sum
      (aggregator): NeighborAggregator(in_features=120, out_features=128, aggr_method=mean)
      (linear_1): Linear(in_features=128, out_features=256, bias=True)
      (linear_2): Linear(in_features=256, out_features=128, bias=True)
      (bn_1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn_2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): SageGCN(
      in_features=128, out_features=2, aggr_hidden_method=sum
      (aggregator): NeighborAggregator(in_features=128, out_features=2, aggr_method

In [3]:
a.train()

model training..
training through cpu
Epoch 000 Batch 000 Loss: 5.6416
Epoch 000 Batch 001 Loss: 1.1078
Epoch 000 Batch 002 Loss: 10.3123
Epoch 000 Batch 003 Loss: 10.4840
Epoch 000 Batch 004 Loss: 6.2598
Epoch 000 Batch 005 Loss: 7.8102
Epoch 000 Batch 006 Loss: 7.3508
Epoch 000 Batch 007 Loss: 10.1797
Epoch 000 Batch 008 Loss: 10.5731
Epoch 000 Batch 009 Loss: 0.3671
Epoch 000 Batch 010 Loss: 8.9212
Epoch 000 Batch 011 Loss: 5.5620
Epoch 000 Batch 012 Loss: 13.9099
Epoch 000 Batch 013 Loss: 16.8648
Epoch 000 Batch 014 Loss: 6.6336
Epoch 000 Batch 015 Loss: 6.3487
Epoch 000 Batch 016 Loss: 7.3861
Epoch 000 Batch 017 Loss: 15.7320
Epoch 000 Batch 018 Loss: 5.5056
Epoch 000 Batch 019 Loss: 4.6191
-----------------------*-----------------------
test accuracy:  0.5585585832595825
-----------------------*-----------------------
Epoch 001 Batch 000 Loss: 7.7747
Epoch 001 Batch 001 Loss: 9.6212
Epoch 001 Batch 002 Loss: 1.7640
Epoch 001 Batch 003 Loss: 4.1173
Epoch 001 Batch 004 Loss: 7.2695

Epoch 010 Batch 015 Loss: 0.2363
Epoch 010 Batch 016 Loss: 0.0563
Epoch 010 Batch 017 Loss: 0.0241
Epoch 010 Batch 018 Loss: 0.0394
Epoch 010 Batch 019 Loss: 0.0473
-----------------------*-----------------------
test accuracy:  0.5855855941772461
-----------------------*-----------------------
Epoch 011 Batch 000 Loss: 0.0614
Epoch 011 Batch 001 Loss: 0.0214
Epoch 011 Batch 002 Loss: 0.1221
Epoch 011 Batch 003 Loss: 0.0376
Epoch 011 Batch 004 Loss: 0.3028
Epoch 011 Batch 005 Loss: 0.0288
Epoch 011 Batch 006 Loss: 0.0332
Epoch 011 Batch 007 Loss: 0.0786
Epoch 011 Batch 008 Loss: 0.1162
Epoch 011 Batch 009 Loss: 0.1305
Epoch 011 Batch 010 Loss: 0.1637
Epoch 011 Batch 011 Loss: 0.0249
Epoch 011 Batch 012 Loss: 0.2756
Epoch 011 Batch 013 Loss: 0.0527
Epoch 011 Batch 014 Loss: 0.0351
Epoch 011 Batch 015 Loss: 0.0299
Epoch 011 Batch 016 Loss: 0.1167
Epoch 011 Batch 017 Loss: 0.0092
Epoch 011 Batch 018 Loss: 0.0226
Epoch 011 Batch 019 Loss: 0.1197
-----------------------*--------------------

In [1]:
import sage_v2_cora
from data import CoraData

In [2]:
data = CoraData(data_root="/Users/shuaihengxiao/Desktop/graphSAGE_v0/data/cora").data

Using Cached file: /Users/shuaihengxiao/Desktop/graphSAGE_v0/data/cora/ch7_cached.pkl


In [3]:
a = sage_v2_cora.run_model(data,
                            hidden_dim = [128,7],
                            num_neighbors_list=[20, 10],
                            aggr_neighbor_method = 'mean',
                            aggr_hidden_method = 'sum',
                            batch_size = 64,
                            epochs = 30,
                            num_batch_per_epoch = 32,
                            lr=1e-4,
                            residual_block=True,)

data preprocessing..
data preprocessing complete!
-----------------------*-----------------------
after filtering single nodes
num of train instances: 1000
num of test instances: 140
-----------------------*-----------------------
model structure
GraphSage(
  in_features=1433, num_neighbors_list=[20, 10]
  (gcn): ModuleList(
    (0): SageGCN(
      in_features=1433, out_features=128, aggr_hidden_method=sum
      (aggregator): NeighborAggregator(in_features=1433, out_features=128, aggr_method=mean)
      (dropout1): Dropout(p=0.4, inplace=False)
      (linear_1): Linear(in_features=128, out_features=256, bias=True)
      (linear_2): Linear(in_features=256, out_features=128, bias=True)
      (bn_1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn_2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): SageGCN(
      in_features=128, out_features=7, aggr_hidden_method=sum
      (aggregator): NeighborAgg

In [4]:
a.train()

model training..
training through cpu
Epoch 000 Batch 000 Loss: 8.6681
Epoch 000 Batch 001 Loss: 7.5369
Epoch 000 Batch 002 Loss: 6.8348
Epoch 000 Batch 003 Loss: 7.8554
Epoch 000 Batch 004 Loss: 7.5405
Epoch 000 Batch 005 Loss: 8.5050
Epoch 000 Batch 006 Loss: 8.4968
Epoch 000 Batch 007 Loss: 8.1036
Epoch 000 Batch 008 Loss: 8.8254
Epoch 000 Batch 009 Loss: 7.3863
Epoch 000 Batch 010 Loss: 9.3827
Epoch 000 Batch 011 Loss: 8.4671
Epoch 000 Batch 012 Loss: 7.6952
Epoch 000 Batch 013 Loss: 7.8918
Epoch 000 Batch 014 Loss: 7.7317
Epoch 000 Batch 015 Loss: 6.3541
Epoch 000 Batch 016 Loss: 7.9998
Epoch 000 Batch 017 Loss: 6.8887
Epoch 000 Batch 018 Loss: 6.9706
Epoch 000 Batch 019 Loss: 6.8206
Epoch 000 Batch 020 Loss: 6.8563
Epoch 000 Batch 021 Loss: 7.7420
Epoch 000 Batch 022 Loss: 6.4108
Epoch 000 Batch 023 Loss: 5.3994
Epoch 000 Batch 024 Loss: 6.6458
Epoch 000 Batch 025 Loss: 7.0993
Epoch 000 Batch 026 Loss: 6.7405
Epoch 000 Batch 027 Loss: 8.0373
Epoch 000 Batch 028 Loss: 5.5541
Epoch

Epoch 007 Batch 000 Loss: 0.7704
Epoch 007 Batch 001 Loss: 0.5516
Epoch 007 Batch 002 Loss: 0.7092
Epoch 007 Batch 003 Loss: 0.7262
Epoch 007 Batch 004 Loss: 0.5840
Epoch 007 Batch 005 Loss: 0.7649
Epoch 007 Batch 006 Loss: 0.7110
Epoch 007 Batch 007 Loss: 0.5827
Epoch 007 Batch 008 Loss: 0.7422
Epoch 007 Batch 009 Loss: 0.4716
Epoch 007 Batch 010 Loss: 0.6803
Epoch 007 Batch 011 Loss: 0.7611
Epoch 007 Batch 012 Loss: 0.4990
Epoch 007 Batch 013 Loss: 0.6994
Epoch 007 Batch 014 Loss: 0.6018
Epoch 007 Batch 015 Loss: 0.7227
Epoch 007 Batch 016 Loss: 0.6197
Epoch 007 Batch 017 Loss: 0.5511
Epoch 007 Batch 018 Loss: 0.6000
Epoch 007 Batch 019 Loss: 0.8131
Epoch 007 Batch 020 Loss: 0.5545
Epoch 007 Batch 021 Loss: 0.7054
Epoch 007 Batch 022 Loss: 0.6485
Epoch 007 Batch 023 Loss: 0.5040
Epoch 007 Batch 024 Loss: 0.7603
Epoch 007 Batch 025 Loss: 0.4412
Epoch 007 Batch 026 Loss: 0.5798
Epoch 007 Batch 027 Loss: 0.5671
Epoch 007 Batch 028 Loss: 0.5767
Epoch 007 Batch 029 Loss: 0.7498
Epoch 007 

Epoch 014 Batch 000 Loss: 0.1723
Epoch 014 Batch 001 Loss: 0.2446
Epoch 014 Batch 002 Loss: 0.2148
Epoch 014 Batch 003 Loss: 0.2099
Epoch 014 Batch 004 Loss: 0.2443
Epoch 014 Batch 005 Loss: 0.2844
Epoch 014 Batch 006 Loss: 0.2938
Epoch 014 Batch 007 Loss: 0.2522
Epoch 014 Batch 008 Loss: 0.2301
Epoch 014 Batch 009 Loss: 0.1906
Epoch 014 Batch 010 Loss: 0.2854
Epoch 014 Batch 011 Loss: 0.1978
Epoch 014 Batch 012 Loss: 0.3398
Epoch 014 Batch 013 Loss: 0.2952
Epoch 014 Batch 014 Loss: 0.2285
Epoch 014 Batch 015 Loss: 0.2153
Epoch 014 Batch 016 Loss: 0.3226
Epoch 014 Batch 017 Loss: 0.3211
Epoch 014 Batch 018 Loss: 0.2460
Epoch 014 Batch 019 Loss: 0.2076
Epoch 014 Batch 020 Loss: 0.3138
Epoch 014 Batch 021 Loss: 0.2266
Epoch 014 Batch 022 Loss: 0.1413
Epoch 014 Batch 023 Loss: 0.1895
Epoch 014 Batch 024 Loss: 0.2068
Epoch 014 Batch 025 Loss: 0.2969
Epoch 014 Batch 026 Loss: 0.2600
Epoch 014 Batch 027 Loss: 0.3257
Epoch 014 Batch 028 Loss: 0.2738
Epoch 014 Batch 029 Loss: 0.2432
Epoch 014 

Epoch 021 Batch 000 Loss: 0.1655
Epoch 021 Batch 001 Loss: 0.1434
Epoch 021 Batch 002 Loss: 0.0839
Epoch 021 Batch 003 Loss: 0.1347
Epoch 021 Batch 004 Loss: 0.0991
Epoch 021 Batch 005 Loss: 0.0903
Epoch 021 Batch 006 Loss: 0.1000
Epoch 021 Batch 007 Loss: 0.0809
Epoch 021 Batch 008 Loss: 0.0943
Epoch 021 Batch 009 Loss: 0.0917
Epoch 021 Batch 010 Loss: 0.1031
Epoch 021 Batch 011 Loss: 0.1026
Epoch 021 Batch 012 Loss: 0.1891
Epoch 021 Batch 013 Loss: 0.1289
Epoch 021 Batch 014 Loss: 0.0796
Epoch 021 Batch 015 Loss: 0.2134
Epoch 021 Batch 016 Loss: 0.1108
Epoch 021 Batch 017 Loss: 0.1574
Epoch 021 Batch 018 Loss: 0.0994
Epoch 021 Batch 019 Loss: 0.1226
Epoch 021 Batch 020 Loss: 0.0839
Epoch 021 Batch 021 Loss: 0.0912
Epoch 021 Batch 022 Loss: 0.1328
Epoch 021 Batch 023 Loss: 0.1345
Epoch 021 Batch 024 Loss: 0.1057
Epoch 021 Batch 025 Loss: 0.1054
Epoch 021 Batch 026 Loss: 0.1562
Epoch 021 Batch 027 Loss: 0.0826
Epoch 021 Batch 028 Loss: 0.0987
Epoch 021 Batch 029 Loss: 0.1721
Epoch 021 

Epoch 028 Batch 000 Loss: 0.0705
Epoch 028 Batch 001 Loss: 0.0723
Epoch 028 Batch 002 Loss: 0.0498
Epoch 028 Batch 003 Loss: 0.0616
Epoch 028 Batch 004 Loss: 0.0701
Epoch 028 Batch 005 Loss: 0.0478
Epoch 028 Batch 006 Loss: 0.0441
Epoch 028 Batch 007 Loss: 0.0853
Epoch 028 Batch 008 Loss: 0.0673
Epoch 028 Batch 009 Loss: 0.0676
Epoch 028 Batch 010 Loss: 0.0931
Epoch 028 Batch 011 Loss: 0.0641
Epoch 028 Batch 012 Loss: 0.0488
Epoch 028 Batch 013 Loss: 0.0754
Epoch 028 Batch 014 Loss: 0.0557
Epoch 028 Batch 015 Loss: 0.0463
Epoch 028 Batch 016 Loss: 0.0487
Epoch 028 Batch 017 Loss: 0.0295
Epoch 028 Batch 018 Loss: 0.0425
Epoch 028 Batch 019 Loss: 0.0488
Epoch 028 Batch 020 Loss: 0.0663
Epoch 028 Batch 021 Loss: 0.0607
Epoch 028 Batch 022 Loss: 0.0397
Epoch 028 Batch 023 Loss: 0.0732
Epoch 028 Batch 024 Loss: 0.0808
Epoch 028 Batch 025 Loss: 0.1090
Epoch 028 Batch 026 Loss: 0.0561
Epoch 028 Batch 027 Loss: 0.0421
Epoch 028 Batch 028 Loss: 0.0559
Epoch 028 Batch 029 Loss: 0.0407
Epoch 028 

In [7]:
%tensorboard --logdir runs

UsageError: Line magic function `%tensorboard` not found.


In [None]:
0.8928571343421936