In [2]:
from torch.utils import data
from torch.nn.utils import rnn

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

In [5]:
import networkx as nx

In [8]:
with open('../Dataset/Cora/cora.cites') as fs:
    for line in fs:
        temp = list(map(int,line.strip().split()))
        print(temp[0],temp[1])
        break

35 1033


In [10]:
# construct network
G = nx.Graph()
count = 0
node_list = {} # assign a id of 0 - n (n nodes...)
with open('../Dataset/Cora/cora.cites') as fs:
    for line in fs:
        u,v = map(int,line.strip().split())
        if u not in node_list:
            node_list[u] = count
            count+=1
        if v not in node_list:
            node_list[v] = count
            count+=1
        G.add_edge(node_list[u],node_list[v])    


In [18]:
# obtaining node features...
feature = [[] for i in range(len(node_list))]
label = {}
with open('../Dataset/Cora/cora.content') as fs:
    for line in fs:
        temp = line.strip().split()
        node = node_list[int(temp[0])]
        label = temp[-1]
        feature[node] = list(map(int,temp[1:-1]))

In [19]:
feature = torch.FloatTensor(feature)

In [20]:
feature.shape

torch.Size([2708, 1433])

In [170]:
class Encoder(nn.Module):
    def __init__(self,feature,graph,input_size,hidden_size):
        super(Encoder,self).__init__()
        self.feature = feature
        self.graph = graph
        self.input_size = input_size
        self.hidden_size = hidden_size
        
        self.W_1 = nn.Parameter(torch.rand(input_size,hidden_size))
        self.W_2 = nn.Parameter(torch.rand(input_size,hidden_size))
        
        self.U_1 = nn.Parameter(torch.rand(hidden_size))
        self.U_2 = nn.Parameter(torch.rand(hidden_size))
        
        self.softmax = nn.Softmax(dim=0)

        
    def forward(self,node_sample):
        
        C_n_1_u = torch.matmul(feature,self.W_1)
        
        C_n_2_u = torch.matmul(feature,self.W_2)
        
        s_node_repr = torch.Tensor()
        
        for node in node_sample:
            
            nbr_1 = torch.Tensor()
            nbr_2 = torch.Tensor()
            # obtaining the first hop neighbors
            neighbors_1 = list(nx.neighbors(G,node))
            
            # obtaining the second hop neighbors
            neighbors_2 = []
            for n in neighbors_1:
                nbr_1 = torch.cat((nbr_1,C_n_1_u[n].view(1,-1)),dim=0) # getting the vectors of neighbors
                neighbors_2 += list(nx.neighbors(G,n))
            
            neighbors_2 = list(set(neighbors_2))
            
            for n in neighbors_2:
                nbr_2 = torch.cat((nbr_2,C_n_2_u[n].view(1,-1)),dim=0) # getting vectors of two-hop neighbors
            
            # calculate attention weights for 1-hop neighbors
            
            att_wt_1 = self.softmax(torch.sum(nbr_1*self.U_1,dim=1).view(-1,1))
            att_wt_2 = self.softmax(torch.sum(nbr_2*self.U_2,dim=1).view(-1,1))
            
            output = torch.cat((torch.sum(nbr_1*att_wt_1,dim=0),torch.sum(nbr_2*att_wt_2,dim=0)),dim=0).view(1,-1)
            
            s_node_repr = torch.cat((s_node_repr,output),dim=0)
            
        return s_node_repr    
            

In [53]:
import random
from collections import Counter

In [79]:
def randomWalk(G,node,walk_length = 3,walk_num=10): # finding list of similar nodes
    sim_nodes = Counter()
    for i in range(walk_num):
        l = 0
        c_node = node
        while(l<walk_length):
            n = random.select(list(nx.neighbors(G,c_node)),1)
            sim_nodes[n]+=1
            c_node = n
            l+=1
    return list(sim_nodes)        

In [55]:
def findSimilarNodes(G):
    similar_nodes = {}
    for node in G.nodes():
        similar_nodes[node] = randomWalk(G,node)
    return similar_nodes 

In [88]:
def getSamples(G,sample_nodes,all_edges): # generates positive and negative samples for a given batch of nodes
    pos_sample_edges = list(filter(lambda x:x[0] in sample_nodes and x[1] in sample_nodes,all_edges))
    neg_sample_size = len(pos_sample_edges)
    neg_sample_edges = []
    i=0
    tries = 2*neg_sample_size # check to keep the number of tries finite
    while i<neg_sample_size:
        u,v = random.sample(sample_nodes,2)
        if (u,v) not in pos_sample_edges and (v,u) not in pos_sample_edges:
            neg_sample_edges.append((u,v))
            i+=1
        tries-=1
        if tries==0:
            break
    return pos_sample_edges,neg_sample_edges        

In [146]:
def createBatches(Graph = G,batch_size = 1,node_list=[],edge_list=[]):
    random.shuffle(node_list)
    num_batches = int(len(node_list)/batch_size)+1
    for i in range(num_batches):
        # get nodes for batches...
        if i<num_batches-1:
            sample_nodes = node_list[i*batch_size:(i+1)*batch_size]
        else:
            sample_nodes = node_list[i*batch_size:]

        # find positive and negative samples for a batch
        pos_sample_edges, neg_sample_edges = getSamples(G,sample_nodes,edge_list)
        
        node_dict = {}
        count = 0
        
        for n in sample_nodes:
            node_dict[n] = count
            count+=1
        
        yield pos_sample_edges,neg_sample_edges,sample_nodes,node_dict

In [178]:
def train(encoder, graph=G, batch_size=25, epochs=1, learning_rate=0.001):
    optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()
    node_list = list(G.nodes())
    edge_list = list(G.edges())
    
    for _ in range(epochs):
        
        for pos_sample, neg_sample, sample_nodes,node_dict in \
        createBatches(Graph=G, batch_size=batch_size, node_list=node_list, edge_list=edge_list):
            
    
            node_repr = encoder(sample_nodes)
            
            
            predicted_vector = torch.Tensor()
            true_label = torch.FloatTensor([1 for i in range(len(pos_sample))]+[0 for i in range
                                                                                   (len(neg_sample))]).view(1,-1)
            
            
            i = 0
            for a,b in pos_sample+neg_sample:
                u = node_dict[a]
                v = node_dict[b]
                pred_val = F.sigmoid(torch.sum(node_repr[u]*node_repr[v])).view(1,1)
                
                predicted_vector = torch.cat((predicted_vector, pred_val),dim=0)
            
            loss = criterion(predicted_vector,true_label)
            
            print('loss: {}'.format(loss))
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            

In [95]:
def getRepresentations(encoder,G):
    nodes = list(G.nodes())
    
    node_repr = encoder(nodes)
    
    return node_repr

In [92]:
feature.shape

torch.Size([2708, 1433])

In [179]:
input_size = feature.shape[1]
hidden_size = 100
encoder = Encoder(feature,G,input_size,hidden_size)
batch_size = 100
train(encoder,graph=G,batch_size=batch_size)

loss: 0.5
loss: 0.5
loss: 0.5
loss: 0.5
loss: 0.5
loss: 0.5
loss: 0.5
loss: 0.5
loss: 0.5
loss: 0.5
loss: 0.5
loss: 0.5
loss: 0.5
loss: 0.5
loss: 0.5
loss: 0.5
loss: 0.5
loss: 0.5
loss: 0.5
loss: 0.5
loss: 0.5
loss: 0.5
loss: 0.5
loss: 0.5


KeyboardInterrupt: 

In [133]:
x = torch.Tensor([[1,2,3],[3,4,5]])
y = torch.Tensor([3,4]).view(-1,1)

In [137]:
print(x*y)
torch.sum(x*y,dim=0)

tensor([[ 3.,  6.,  9.],
        [12., 16., 20.]])


tensor([15., 22., 29.])

In [135]:
x.shape

torch.Size([2, 3])