In [1]:
import pickle
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import ncsr_import
import numpy as np
import dgl
import torch
import torch.nn as nn
import torch.nn.functional as F
import itertools
import numpy as np
import scipy.sparse as sp
from dgl import DGLGraph 
import networkx as nx
#from pyhawkes.models import DiscreteTimeNetworkHawkesModelSpikeAndSlab


Using backend: pytorch


In [2]:
ncsr_age = pd.read_csv('age_subset.csv', index_col=0)

In [3]:
AGO = []
for filename in os.listdir(r'C:\Users\galyn\Documents\GitHub\Seminar\individual_graphs/DSM_AGO'):
    
    nxg= nx.read_gpickle('individual_graphs/DSM_AGO/'+filename)
    for node in nxg.nodes:
            nxg.nodes[node]['name-age'] = [list(ncsr_age.columns).index(node), nxg.nodes[node]['age']]
    dgl_graph = dgl.from_networkx(nxg, node_attrs = ['name-age'])
    AGO.append(dgl_graph)

In [4]:
dbatch = dgl.batch(AGO, ndata=['name-age'])

In [5]:
dbatch.ndata['name-age'].shape

torch.Size([12327, 2])

In [6]:
u, v = dbatch.edges()

In [7]:
eids = np.arange(dbatch.number_of_edges())
eids = np.random.permutation(eids)

In [8]:
test_size = int(len(eids)*.1)

In [9]:
train_size = dbatch.number_of_edges() - test_size

In [10]:
test_pos_u, test_pos_v = u[eids[:test_size]], v[eids[:test_size]]
train_pos_u, train_pos_v = u[eids[test_size:]], v[eids[test_size:]]


In [11]:
adj = sp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy())))
adj_neg = 1 - adj.todense() - np.eye(dbatch.number_of_nodes())
neg_u, neg_v = np.where(adj_neg != 0)


In [12]:
neg_eids = np.random.choice(len(neg_u), dbatch.number_of_edges() // 2)
test_neg_u, test_neg_v = neg_u[neg_eids[:test_size]], neg_v[neg_eids[:test_size]]
train_neg_u, train_neg_v = neg_u[neg_eids[test_size:]], neg_v[neg_eids[test_size:]]

In [13]:
train_g = dgl.remove_edges(dbatch, eids[:test_size])

In [14]:
from dgl.nn import SAGEConv

In [15]:
class GraphSAGE(nn.Module):
    def __init__(self, in_feats, h_feats):
        super(GraphSAGE, self).__init__()
        self.conv1 = SAGEConv(in_feats, h_feats, 'lstm')
        self.conv2 = SAGEConv(h_feats, h_feats, 'lstm')
        self.conv3 = SAGEConv(h_feats, h_feats, 'lstm')

    def forward(self, g, in_feat):
        #in_feat = in_feat.view(1, -1)
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
        h = F.relu(h)
        h = self.conv3(g, h)
        return h

In [16]:
train_pos_g = dgl.graph((train_pos_u, train_pos_v), num_nodes=dbatch.number_of_nodes())
train_neg_g = dgl.graph((train_neg_u, train_neg_v), num_nodes=dbatch.number_of_nodes())

test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=dbatch.number_of_nodes())
test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=dbatch.number_of_nodes())

In [22]:
import dgl.function as fn

class MLPPredictor(nn.Module):
    def __init__(self, h_feats):
        super().__init__()
        self.W1 = nn.Linear(h_feats * 2, h_feats)
        self.W2 = nn.Linear(h_feats, 1)

    def apply_edges(self, edges):
        """
        Computes a scalar score for each edge of the given graph.

        Parameters
        ----------
        edges :
            Has three members ``src``, ``dst`` and ``data``, each of
            which is a dictionary representing the features of the
            source nodes, the destination nodes, and the edges
            themselves.

        Returns
        -------
        dict
            A dictionary of new edge features.
        """
        h = torch.cat([edges.src['h'], edges.dst['h']], 1)
        return {'score': self.W2(F.relu(self.W1(h))).squeeze(1)}

    def forward(self, g, h):
        with g.local_scope():
            g.ndata['h'] = h
            g.apply_edges(self.apply_edges)
            return g.edata['score']

In [23]:
class DotPredictor(nn.Module):
    def forward(self, g, h):
        with g.local_scope():
            g.ndata['h'] = h
            # Compute a new edge feature named 'score' by a dot-product between the
            # source node feature 'h' and destination node feature 'h'.
            g.apply_edges(fn.u_dot_v('h', 'h', 'score'))
            # u_dot_v returns a 1-element vector for each edge so you need to squeeze it.
            return g.edata['score'][:, 0]

In [24]:
model = GraphSAGE(train_g.ndata['name-age'].shape[1], 1)
# You can replace DotPredictor with MLPPredictor.
#pred = MLPPredictor(16)
pred = DotPredictor()

def compute_loss(pos_score, neg_score):
    scores = torch.cat([pos_score, neg_score])
    labels = torch.cat([torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])])
    return F.binary_cross_entropy_with_logits(scores, labels)

def compute_auc(pos_score, neg_score):
    scores = torch.cat([pos_score, neg_score]).numpy()
    labels = torch.cat(
        [torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])]).numpy()
    return roc_auc_score(labels, scores)

In [25]:
from tqdm import tqdm

In [21]:
# ----------- 3. set up loss and optimizer -------------- #
# in this case, loss will in training loop
optimizer = torch.optim.Adam(itertools.chain(model.parameters(), pred.parameters()), lr=0.01)

# ----------- 4. training -------------------------------- #

auc = []
for r in [2500]:
    for e in tqdm(range(r)):
        # forward
        h = model(train_g, train_g.ndata['name-age'].float())
        pos_score = pred(train_pos_g, h)
        neg_score = pred(train_neg_g, h)
        loss = compute_loss(pos_score, neg_score)

        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if e % 100 == 0:
            print('In epoch {}, loss: {}'.format(e, loss))
            from sklearn.metrics import roc_auc_score
            with torch.no_grad():
                pos_score = pred(test_pos_g, h)
                neg_score = pred(test_neg_g, h)
                auc.append(compute_auc(pos_score, neg_score))
                print('AUC', e, compute_auc(pos_score, neg_score))

    # ----------- 5. check results ------------------------ #
    from sklearn.metrics import roc_auc_score
    with torch.no_grad():
        pos_score = pred(test_pos_g, h)
        neg_score = pred(test_neg_g, h)
        auc.append(compute_auc(pos_score, neg_score))
        print('AUC', r, compute_auc(pos_score, neg_score))

  0%|          | 0/2500 [00:00<?, ?it/s]In epoch 0, loss: 1.6794089078903198
  0%|          | 1/2500 [00:05<3:53:46,  5.61s/it]AUC 2500 0.5309913334485638
  0%|          | 7/2500 [00:39<3:54:08,  5.63s/it]


KeyboardInterrupt: 

In [490]:
with torch.no_grad():
    pos_score = pred(test_pos_g, h)
    neg_score = pred(test_neg_g, h)

In [498]:
auc

[0.5266671557313184,
 0.5130468142528873,
 0.5178638052730741,
 0.502648491860656,
 0.4916148864410682]

In [485]:
len(test_neg_g.edges()[1])

40942

In [456]:
neg_score

tensor([1407.7152,  354.8364, 2023.3774,  ...,  983.2585,   61.1702,
        6064.9575])

In [457]:
pos_score

tensor([3183.1089, -528.0140, 1196.3634,  ...,  929.7194, 5977.1855,
        -576.3859])