In [1]:
##### Arif Khan #####
### arif.khan@pnnl.gov ###

import warnings
import igraph as ig
import networkx as nx
import collections
import scipy as sp
import numpy as np
import bMatching as bm
import  Evaluation as ev
import importlib
import matplotlib.pylab as plt
import time
import os
import subprocess
import pandas as pd
import math
import sys
import netAlignPY as na

import sklearn.metrics.pairwise
import pickle as pickle
import scipy.sparse as sps
import argparse
import theano
from theano import tensor as T
from scipy.sparse import csr_matrix, coo_matrix
from sklearn.neighbors import KDTree
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
import unsup_align
import embedding
import igraph as ig
import karateclub as kt
import importlib
import time

importlib.reload(unsup_align)
importlib.reload(na)

<module 'netAlignPY' from '/home/khan242/net_align/src/netAlignPY.so'>

In [2]:
def read_networks(f1,f2,fl=None,input_dir=None,gformat='edgelist'):
    
    L=None
    if input_dir is not None:
        f1=input_dir+f1
        f2=input_dir+f2
        if fl is not None:
            fl=input_dir+fl
        
    if gformat == 'graphml':
        G1 = ig.read(f1,format="graphml")
        G2 = ig.read(f2,format="graphml")
        L  =ig.read(fl,format="graphml")
    
    if gformat == 'edgelist':
        G1 = ig.ig.Graph.Read_Edgelist(f1)   
        G2 = ig.ig.Graph.Read_Edgelist(f2)
        L  = ig.ig.Graph.Read_Edgelist(fl)
    
    if gformat == 'leda':
    
        T=nx.read_leda(f1)
        nx.write_graphml(T,'graph.graphml')
        G1 = ig.read('graph.graphml',format="graphml")
    
        T=nx.read_leda(f2)
        nx.write_graphml(T,'graph.graphml')
        G2 = ig.read('graph.graphml',format="graphml")
        
        if fl is not None:
            T=nx.read_leda(fl)
            nx.write_graphml(T,'graph.graphml')
            L = ig.read('graph.graphml',format="graphml")
       
    return G1,G2,L

In [3]:
def get_alignment(G1,G2,M):
    
    n1=G1.vcount()
    n2=G2.vcount()
    Align=set([])
   
    for (i,j) in M:    
        #print(i,j)
        u=G1.vs[i]['id']
        v=G2.vs[j]['id'] 
        #print(u,v)
        Align.add((u,v))
   
    return Align 

In [4]:
def save_alignment(L,filename):
    
    fl=open(filename,"w")
    for (u,v) in L:
        t=u+" "+v+"\n"
        fl.write(t)
    
    fl.close()

In [5]:
def evaluate(AQ,G1,G2,AL):

    AQ.mapping_set=AL
    if AQ.true_mapping_set == None:
        qual=AQ.evaluate(False,False,False,False,False,True,False,False,False,False)
        score=qual["NCV-GS3"]
    else:
        qual=AQ.evaluate(False,False,True,False,False,True,False,False,False,False)
        score=float(qual["F-NC"])

    return score, qual

In [6]:
def igraph2mtx(G,n,fname='graph.mtx',bipartite=False):
    c1='%%MatrixMarket matrix coordinate real general'
    c2='% Generated'
    
    edgeG=G.get_edgelist()
    #print(edgeG)
    m=int(len(edgeG))
    
    try:
        weights=G.es['weight']
    except:
        weights=[1]*m

    
    if bipartite:
        nl=n
        nr=G.vcount()-nl
        h=str(nl)+" "+str(nr)+" "+str(m)
        
    else:
        nl=n
        nr=n
        h=str(nl)+" "+str(nr)+" "+str(m*2)
    
    f=open(fname,'w')
    
    f.write(c1+"\n")
    f.write(c2+"\n")
    f.write(h+"\n")
    #print(c1)
    #print(c2)
    #print(h)
    for i in range(m):
        (u,v)=edgeG[i]
        w=weights[i]
        
        if bipartite:
            v=v-nl
            #print(u,v,w)
            f.write(str(u+1)+" "+str(v+1)+" "+str(w)+"\n")
        else:
            #print(u,v,w)
            #print(v,u,w)
            f.write(str(u+1)+" "+str(v+1)+" "+str(w)+"\n")
            f.write(str(v+1)+" "+str(u+1)+" "+str(w)+"\n")
    
    f.close()

In [7]:
def igraph2nx(G):
    
    Gx = nx.Graph()
    Gx.add_nodes_from(range(G.vcount()))
    Gx.add_edges_from(G.get_edgelist())
    
    return Gx

In [8]:
def get_embedding_cone(G1,G2):
    
    
    adjA = nx.adjacency_matrix(igraph2nx(G1)).todense().astype(float)
    adjB = nx.adjacency_matrix(igraph2nx(G2)).todense().astype(float)

    embed1 = embedding.netmf(adjA)
    embed2 = embedding.netmf(adjB)
    #print(type(embed1),embed1.shape)
    #print(embed1[0])

    adj1=csr_matrix(adjA)
    adj2=csr_matrix(adjB)

    init_sim, corr_mat = unsup_align.convex_init_sparse(embed1, embed2, K_X = adj1, K_Y = adj2)
    dim_align_matrix, corr_mat = unsup_align.align(embed1, embed2, init_sim)
    aligned_embed1 = embed1.dot(dim_align_matrix)
    
    return np.array(aligned_embed1), np.array(embed2)

In [9]:
def get_embedding_gwave(G1,G2):
    
    adjA = nx.adjacency_matrix(igraph2nx(G1)).todense().astype(float)
    gv=kt.GraphWave(seed=947)
    gv.fit(G1)
    embed1=gv.get_embedding()
    
    adjB = nx.adjacency_matrix(igraph2nx(G2)).todense().astype(float)
    gv=kt.GraphWave(seed=947)
    gv.fit(G2)
    embed2=gv.get_embedding()
    
    #print(type(embed1),embed1.shape)
    #print(embed1[0])

    adj1=csr_matrix(adjA)
    adj2=csr_matrix(adjB)

    init_sim, corr_mat = unsup_align.convex_init_sparse(embed1, embed2, K_X = adj1, K_Y = adj2)
    dim_align_matrix, corr_mat = unsup_align.align(embed1, embed2, init_sim)
    aligned_embed1 = embed1.dot(dim_align_matrix)
    
    return np.array(aligned_embed1), np.array(embed2)

In [10]:
def create_L(G1,G2,kfactor=.10,algo='cone'):
    
    if algo == 'cone':
        feature1,feature2=get_embedding_cone(G1,G2)
    if algo == 'gwave':
        feature1,feature2=get_embedding_gwave(G1,G2)
    

    K1=int(feature1.shape[0]*kfactor)
    K2=int(feature2.shape[0]*kfactor)
    #print('K values: ',K1,K2)


    #nbrs1 = NearestNeighbors(n_neighbors=K1,metric='cosine').fit(feature1[:,1:].astype(float))
    nbrs1 = NearestNeighbors(n_neighbors=K1,algorithm='brute').fit(feature1[:,1:].astype(float))
    distances1, indices1 = nbrs1.kneighbors(feature2[:,1:].astype(float))

    #nbrs2 = NearestNeighbors(n_neighbors=K2,metric='cosine').fit(feature2[:,1:].astype(float))
    nbrs2 = NearestNeighbors(n_neighbors=K2,algorithm='brute').fit(feature2[:,1:].astype(float))
    distances2, indices2 = nbrs2.kneighbors(feature1[:,1:].astype(float))

    ##### Do it in Igraph
    n1=list(range(G1.vcount()))
    n2=list(range(G2.vcount()))

    #print(n1)

    L={}
    for i in range(len(n1)):
        for jj in range(K2):
            j=indices2[i][jj]
            w=distances2[i][jj]
            if math.isnan(w):
                continue
            L[(n1[i],n2[j])]=[w]  
            

    for i in range(len(n2)):
        for jj in range(K1):
            j=indices1[i][jj]
            w=distances1[i][jj]
            if math.isnan(w):
                continue
            try:
                L[(n1[j],n2[i])].append(w )
            except:
                pw=w ### Just ignoring !!

    remove_keys=[]
    for k,v in L.items():
    #     if k[0]=='PAP1':
    #         print(k,L[k])
        if len(v)== 2:
            val=(v[0]+v[1])*0.5
            val=1/(math.exp(val))*1000  ### 1/sqrt(e^w1+e^w2)
            #val=(1-math.sqrt(v[0]*v[1]))*1000
            L[k]=val
        else:
            remove_keys.append(k)
    for k in remove_keys:
        del L[k]


    #s='CFT2'
    # s='PAP1'
    # s='YTH1'
    # for k in L.keys():
    #     if k[1]==s:
    #         print(k,L[k])


    G1_ids={}
    G2_ids={}

    for i in range(len(n1)):
        G1_ids[n1[i]]=i

    for i in range(len(n2)):
        G2_ids[n2[i]]=i+len(n1)

    #print(G2_ids)
    n1=len(n1)
    n2=len(n2)

    GL=ig.Graph()
    GL.add_vertices(list(range(n1+n2)))
    edges=[]
    weights=[]
    for (u,v) in L.keys():

        i=G1_ids[u]
        j=G2_ids[v]
        w=L[(u,v)]
        edges.append((i,j))
        weights.append(w)

    #print(edges[0],weights[0])

    GL.add_edges(edges)
    GL.es['weight']=weights

    #print(GL.vcount(),GL.ecount(),GL.is_directed())

    return GL

In [11]:
def netAlignEX(f1,f2,fl,alpha=1,beta=1,ft=None,input_dir=None,gformat='edgelist'):

    t1=time.time()
    #### Reading the networks
    G1,G2,L=read_networks(f1,f2,fl,input_dir,gformat=gformat)
    igraph2mtx(G1,G1.vcount(),fname='graph-A.mtx')
    igraph2mtx(G2,G2.vcount(),fname='graph-B.mtx')
    t2=time.time()
    print("Input graph reading done: ", t2-t1, " sec")
    if L is not None:
        igraph2mtx(L,G1.vcount(),fname='graph-L.mtx',bipartite=True)
    else:  
        L = create_L(G1,G2)
        igraph2mtx(L,G1.vcount(),fname='graph-L.mtx',bipartite=True)
    
    t3=time.time()
    print("Sparse bipartite graph creation done: ", t3-t2, " sec")

    #### Initialize evaluation class
    if ft is not None:
        AQ=ev.AlignmentQuality(input_dir+f1, input_dir+f2, None, input_dir+ft, None, None)
    else:
        AQ=ev.AlignmentQuality(input_dir+f1, input_dir+f2, None, None, None, None)

    na.netalign()
    
    t4=time.time()
    print("Alignment done: ", t4-t3, " sec")

    #### Assuming the result is in out.log by default
    f=open('out.log','r')
    M=[]
    for l in f:
        l=l.strip('\n')
        l=l.split(' ')
        M.append((int(l[0])-1,int(l[1])-1))
    f.close()
    AL=get_alignment(G1,G2,M)
    s,q=evaluate(AQ,G1,G2,AL)
    
    command=['rm','out.log','graph-A.mtx','graph-B.mtx','graph-L.mtx']
    result = subprocess.run(command, stdout=subprocess.PIPE)  
    
    t5=time.time()
    print("Output processing done: ", t5-t4, " sec")
    print("Total time: ", t5-t1, " sec")
    return AL,s,q

In [13]:
if __name__ == '__main__':
    
    warnings.filterwarnings("ignore")
    
    input_dir="../data/synthetic_networks/"
    res_dir="../results/"
    resf="res.aln"
    
    f1="yeast0_Y2H1.gw"
    f2="yeast5_Y2H1.gw"
    fl=None
    ft="true_node_mapping.txt"
    alpha=1
    beta=1
    AL,s,q=netAlignEX(f1,f2,fl,alpha,beta,ft,input_dir=input_dir,gformat='leda')
    save_alignment(AL,res_dir+resf)
    print('Score: ',s,q)  


Input graph reading done:  0.963507890701294  sec
10
Sparse bipartite graph creation done:  7.783968448638916  sec
Alignment done:  131.9514570236206  sec
Output processing done:  0.07504987716674805  sec
Total time:  140.77398324012756  sec
Score:  0.8037848605577689 {'F-NC': 0.8037848605577689, 'NCV-GS3': 0.9644119958490545}


In [None]:
print(q)