In [1]:
import pandas as pd 
import os
import numpy as np
import sklearn.metrics.pairwise
import networkx as nx
import pickle as pickle
import scipy.sparse as sps
import argparse
import time
import theano
from theano import tensor as T
from scipy.sparse import csr_matrix, coo_matrix
from sklearn.neighbors import KDTree
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
import unsup_align
import embedding
import math
import igraph as ig
import karateclub as kt
import importlib

importlib.reload(unsup_align)

<module 'unsup_align' from '/home/khan242/netAlignPY/src/unsup_align.py'>

In [16]:
def igraph2mtx(G,n,fname='graph.mtx',bipartite=False):
    c1='%%MatrixMarket matrix coordinate real general'
    c2='% Generated'
    
    edgeG=G.get_edgelist()
    #print(edgeG)
    m=int(len(edgeG))
    
    try:
        weights=G.es['weight']
    except:
        weights=[1]*m

    
    if bipartite:
        nl=n
        nr=G.vcount()-nl
        h=str(nl)+" "+str(nr)+" "+str(m)
        
    else:
        nl=n
        nr=n
        h=str(nl)+" "+str(nr)+" "+str(m*2)
    
    f=open(fname,'w')
    
    f.write(c1+"\n")
    f.write(c2+"\n")
    f.write(h+"\n")
    #print(c1)
    #print(c2)
    #print(h)
    for i in range(m):
        (u,v)=edgeG[i]
        w=weights[i]
        
        if bipartite:
            v=v-nl
            #print(u,v,w)
            f.write(str(u+1)+" "+str(v+1)+" "+str(w)+"\n")
        else:
            #print(u,v,w)
            #print(v,u,w)
            f.write(str(u+1)+" "+str(v+1)+" "+str(w)+"\n")
            f.write(str(v+1)+" "+str(u+1)+" "+str(w)+"\n")
    
    f.close()

In [12]:
#input_dir="/Users/khan242/PNNL/netAlign/data/synthetic_networks/"
input_dir="/home/khan242/netAlign/data/synthetic_networks/"
#res_dir="/Users/khan242/PNNL/netAlign/src/"




f1="yeast0_Y2H1.gw"
f2="yeast5_Y2H1.gw"

#f1="star.g"
#f2="email-Enron.txt"



In [3]:
def complete_bipartite(f1,f2,input_dir=None):
    if input_dir is not None:
        f1=input_dir+f1
        f2=input_dir+f2
    
    G1=nx.read_edgelist(f1,create_using=nx.DiGraph)
    G2=nx.read_edgelist(f2,create_using=nx.DiGraph)
    
    n1=G1.number_of_nodes()
    n2=G2.number_of_nodes()
    
    B=ig.Graph()    
    
    G1_vid=list(range(n1))
    G2_vid=list(range(n1,n1+n2))
    
    B.add_vertices(G1_vid)
    B.add_vertices(G2_vid)
    
    edgeL=[]
    for i in G1_vid:
        for j in G2_vid:
            edgeL.append((i,j))
    B.add_edges(edgeL)
    B.es["weight"]=1
    
    return B

    

In [4]:
def kd_align(emb1, emb2, normalize=False, distance_metric="euclidean", num_top=10):
    kd_tree = KDTree(emb2, metric=distance_metric)

    row = np.array([])
    col = np.array([])
    data = np.array([])

    dist, ind = kd_tree.query(emb1, k=num_top)
    print("queried alignments")
    row = np.array([])
    for i in range(emb1.shape[0]):
        row = np.concatenate((row, np.ones(num_top) * i))
    col = ind.flatten()
    data = np.exp(-dist).flatten()
    sparse_align_matrix = coo_matrix((data, (row, col)), shape=(emb1.shape[0], emb2.shape[0]))
    return sparse_align_matrix.tocsr()

In [5]:
def get_embedding(f1, f2, input_dir=None):
    
    if input_dir is not None:
        f1=input_dir+f1
        f2=input_dir+f2
    
    G1=nx.read_leda(f1)
    adjA = nx.adjacency_matrix(G1).todense().astype(float)

    G2=nx.read_leda(f2)
    adjB = nx.adjacency_matrix(G2).todense().astype(float)

    embed1 = embedding.netmf(adjA)
    embed2 = embedding.netmf(adjB)
    print(type(embed1),embed1.shape)
    print(embed1[0])

    adj1=csr_matrix(adjA)
    adj2=csr_matrix(adjB)

    init_sim, corr_mat = unsup_align.convex_init_sparse(embed1, embed2, K_X = adj1, K_Y = adj2)
    dim_align_matrix, corr_mat = unsup_align.align(embed1, embed2, init_sim)
    aligned_embed1 = embed1.dot(dim_align_matrix)
    
    return G1, G2, np.array(aligned_embed1), np.array(embed2)

In [6]:
def get_embedding_edgelist(f1, f2, input_dir=None):
    
    if input_dir is not None:
        f1=input_dir+f1
        f2=input_dir+f2
    
    G1=nx.read_edgelist(f1,create_using=nx.DiGraph)
    adjA = nx.adjacency_matrix(G1).todense().astype(float)

    G2=nx.read_edgelist(f2,create_using=nx.DiGraph)
    adjB = nx.adjacency_matrix(G2).todense().astype(float)

    embed1 = embedding.netmf(adjA)
    embed2 = embedding.netmf(adjB)
    print(type(embed1),embed1.shape)
    print(embed1[0])

    adj1=csr_matrix(adjA)
    adj2=csr_matrix(adjB)

    init_sim, corr_mat = unsup_align.convex_init_sparse(embed1, embed2, K_X = adj1, K_Y = adj2)
    dim_align_matrix, corr_mat = unsup_align.align(embed1, embed2, init_sim)
    aligned_embed1 = embed1.dot(dim_align_matrix)
    
    return G1, G2, np.array(aligned_embed1), np.array(embed2)

In [7]:
def get_embedding_gwave(f1, f2, input_dir=None):
    
    if input_dir is not None:
        f1=input_dir+f1
        f2=input_dir+f2
    
    #G1=nx.read_leda(f1)
    G1=nx.read_edgelist(f1,create_using=nx.Graph)
    G=nx.convert_node_labels_to_integers(G1)
    adjA = nx.adjacency_matrix(G).todense().astype(float)
    gv=kt.GraphWave(seed=947)
    gv.fit(G)
    embed1=gv.get_embedding()

    #G2=nx.read_leda(f2)
    G2=nx.read_edgelist(f2,create_using=nx.Graph)
    G=nx.convert_node_labels_to_integers(G2)
    adjB = nx.adjacency_matrix(G).todense().astype(float)
    gv=kt.GraphWave(seed=947)
    gv.fit(G)
    embed2=gv.get_embedding()
    
    print(type(embed1),embed1.shape)
    print(embed1[0])

    adj1=csr_matrix(adjA)
    adj2=csr_matrix(adjB)

    init_sim, corr_mat = unsup_align.convex_init_sparse(embed1, embed2, K_X = adj1, K_Y = adj2)
    dim_align_matrix, corr_mat = unsup_align.align(embed1, embed2, init_sim)
    aligned_embed1 = embed1.dot(dim_align_matrix)
    
    return G1, G2, np.array(aligned_embed1), np.array(embed2)

In [8]:
def get_matching(G1,G2, emb1,emb2):
    alignment_matrix = kd_align(emb1, emb2)
    n_nodes = alignment_matrix.shape[0]
    #print(alignment_matrix)
    AL=[]
    counterpart_dict = {}
    
    for node_index in range(n_nodes):
        
        row, possible_alignments, possible_values = sps.find(alignment_matrix[node_index])
        node_sorted_indices = possible_alignments[possible_values.argsort()]
        counterpart = node_sorted_indices[-1]
        counterpart_dict[node_index] = counterpart
    
    n1=[n[0] for n in G1.nodes(data=True)]
    n2=[n[0] for n in G2.nodes(data=True)]
    
    vs=list(counterpart_dict.values())
    print('Match: ',len(vs),len(set(vs)))
    for i,j in counterpart_dict.items():
        AL.append((n1[i],n2[j]))
    
    return AL

In [9]:
def get_score(AL):
    input_dir="/home/khan242/netAlign/data/synthetic_networks/"
    res_dir="/home/khan242/netAlign/results/synthetic_networks/danai/"
    
    
    f1="yeast0_Y2H1.gw"
    f2="yeast5_Y2H1.gw"
    ft="true_node_mapping.txt"
    
    AL="cone_yeast0_yeast5_Y2H1_0.aln"

    #### Initialize evaluation class
    if ft is not None:
        AQ=ev.AlignmentQuality(input_dir+f1, input_dir+f2, res_dir+AL, input_dir+ft, None, None)
    else:
        AQ=ev.AlignmentQuality(input_dir+f1, input_dir+f2, res_dir+AL, None, None, None)

    if AQ.true_mapping_set == None:
        qual=AQ.evaluate(False,False,False,False,False,True,False,False,False,False)
        score=qual["NCV-GS3"]
    else:
        qual=AQ.evaluate(False,False,True,False,False,True,False,False,False,False)
        score=float(qual["F-NC"])

    print('Score: ',score, qual)

In [18]:

G1,G2,feature1,feature2=get_embedding(f1,f2, input_dir=input_dir)
#G1,G2,feature1,feature2=get_embedding_edgelist(f1,f2, input_dir=input_dir)
kfactor=.10
K1=int(feature1.shape[0]*kfactor)
K2=int(feature2.shape[0]*kfactor)
print('K values: ',K1,K2)

p1=(f1.split('.')[0]).split('_')[0]
p2=(f2.split('.')[0]).split('_')[0]
Lf=input_dir+'L_'+str(kfactor)+'_'+p1+'_'+p2+'.graphml'
print(Lf)

#nbrs1 = NearestNeighbors(n_neighbors=K1,metric='cosine').fit(feature1[:,1:].astype(float))
nbrs1 = NearestNeighbors(n_neighbors=K1,algorithm='brute').fit(feature1[:,1:].astype(float))
distances1, indices1 = nbrs1.kneighbors(feature2[:,1:].astype(float))

#nbrs2 = NearestNeighbors(n_neighbors=K2,metric='cosine').fit(feature2[:,1:].astype(float))
nbrs2 = NearestNeighbors(n_neighbors=K2,algorithm='brute').fit(feature2[:,1:].astype(float))
distances2, indices2 = nbrs2.kneighbors(feature1[:,1:].astype(float))

n1=[n[0] for n in G1.nodes(data=True)]
n2=[n[0] for n in G2.nodes(data=True)]

#print(n1)

L={}
for i in range(len(n1)):
    for jj in range(K2):
        j=indices2[i][jj]
        w=distances2[i][jj]
        if math.isnan(w):
            continue
        L[(n1[i],n2[j])]=[w]  #### Converting it to similarity
        #L[(n1[i],n2[j])]=[w]

for i in range(len(n2)):
    for jj in range(K1):
        j=indices1[i][jj]
        w=distances1[i][jj]
        if math.isnan(w):
            continue
        try:
            L[(n1[j],n2[i])].append(w )
            #L[(n1[j],n2[i])].append(w)
        except:
            pw=w ### Just ignoring !!

remove_keys=[]
for k,v in L.items():
#     if k[0]=='PAP1':
#         print(k,L[k])
    if len(v)== 2:
        val=(v[0]+v[1])*0.5
        val=1/(math.exp(val))*1000  ### 1/sqrt(e^w1+e^w2)
        #val=(1-math.sqrt(v[0]*v[1]))*1000
        L[k]=val
    else:
        remove_keys.append(k)
for k in remove_keys:
    del L[k]


#s='CFT2'
# s='PAP1'
# s='YTH1'
# for k in L.keys():
#     if k[1]==s:
#         print(k,L[k])


G1_ids={}
G2_ids={}

for i in range(len(n1)):
    G1_ids[n1[i]]=i

for i in range(len(n2)):
    G2_ids[n2[i]]=i+len(n1)

#print(G2_ids)
n1=len(n1)
n2=len(n2)

GL=ig.Graph()
GL.add_vertices(list(range(n1+n2)))
edges=[]
weights=[]
for (u,v) in L.keys():
    
    i=G1_ids[u]
    j=G2_ids[v]
    w=L[(u,v)]
    edges.append((i,j))
    weights.append(w)

print(edges[0],weights[0])

GL.add_edges(edges)
GL.es['weight']=weights

print(GL.vcount(),GL.ecount(),GL.is_directed())

#ig.Graph.write_graphml(GL,Lf)
igraph2mtx(GL,n1,fname='test.mtx',bipartite=True)


<class 'numpy.ndarray'> (1004, 128)
[-8.26110187e-03 -1.80054998e-03 -5.17523792e-02  1.44326582e-02
  2.81463922e-02 -1.37194080e-02  7.08073301e-03 -2.60170468e-02
 -1.49758271e-02  5.99669924e-02 -9.95947737e-03 -2.59426321e-02
  2.09436593e-02 -1.07687387e-02 -2.13851929e-02  3.67507730e-04
 -3.95272835e-02 -1.28008052e-03  7.32216634e-03  1.20329670e-02
 -3.02200165e-02 -2.78745560e-03  1.58362633e-03 -5.22284766e-03
  1.19358404e-02  3.13000260e-03  2.59270476e-02 -1.58384306e-02
 -1.86229452e-02  2.65064821e-02  1.13999900e-02  5.82578699e-03
  6.24813565e-03 -9.67903529e-03  2.04617423e-02 -1.38778884e-02
  3.51834300e-02 -3.24862900e-02 -9.68117129e-03  1.35177837e-02
  1.01712522e-02 -8.58432000e-04  1.56332302e-02  1.42557057e-02
  1.02920998e-02 -4.63186518e-03  1.30562804e-02 -1.21349356e-02
  6.57102992e-04  2.39517354e-02 -5.07735259e-02 -8.64758012e-03
 -5.09000036e-03 -1.28657821e-01 -4.39887429e-02 -1.20270017e-01
  9.92372111e-03 -1.91488142e-02  6.56765575e-03 -4.03

In [14]:
G1,G2,feature1,feature2=get_embedding_gwave(f1,f2, input_dir=input_dir)
#aln=get_matching(G1,G2,feature1,feature2)


AssertionError: Graph is not connected.

In [None]:
G1,G2,feature1,feature2=get_embedding(f1,f2, input_dir=input_dir)

In [None]:
f=open("/home/khan242/netAlign/results/synthetic_networks/danai/cone_yeast0_yeast5_Y2H1_0.aln",'w')
for (i,j) in aln:
    f.write(str(i)+" "+str(j)+"\n")
f.close()

In [None]:
###### Embedding GraphWave #####
G1=nx.read_leda(input_dir+f1)
G=nx.convert_node_labels_to_integers(G1)
gv=kt.GraphWave()
gv.fit(G)
feature1=gv.get_embedding()

In [None]:
print(type(e),type(feature1),type(e[0]),type(feature1[0]))

In [None]:
feature1[0]

In [None]:
GL=complete_bipartite(f1,f2,input_dir=input_dir)
print(GL.vcount(),GL.ecount(),GL.is_directed())

p1=(f1.split('.')[0]).split('_')[0]
p2=(f2.split('.')[0]).split('_')[0]
Lf=input_dir+'L_'+str(1)+'_'+p1+'_'+p2+'.graphml'
print(Lf)
ig.Graph.write_graphml(GL,Lf)