In [1]:
import warnings
import igraph as ig
import networkx as nx
import collections
import scipy as sp
import numpy as np
import bMatching as bm
import importlib
from sklearn.metrics.pairwise import cosine_similarity


In [None]:
def community_info(G):
    C=G.community_multilevel()
    G.vs["community"] = C.membership
    cinfo={}
    for u in G.vs:
        uid=u['id']
        cid=u['community']
        nbors=G.neighbors(u)
        comms=[]
        for v in nbors:
            comms.append(G.vs[v]['community'])
        
        count=len(set(comms))
        freq=collections.Counter(comms)
        for keys in freq:
            if keys is cid:
                val=freq[keys]
                freq[keys]=val+1
        
        freq_sorted=sorted(freq.items(), key=lambda x: x[1], reverse=True)
        
        cinfo[uid]=[count,cid,freq_sorted]
    
    return cinfo 
        

In [None]:
def read_networks(f1,f2):
    
    T=nx.read_leda(f1)
    nx.write_graphml(T,'graph.graphml')
    G1 = ig.read('graph.graphml',format="graphml")
    
    T=nx.read_leda(f2)
    nx.write_graphml(T,'graph.graphml')
    G2 = ig.read('graph.graphml',format="graphml")
    
    return G1,G2


In [None]:
def cluster_networks(G):
    C=G.community_multilevel()
    G.vs["community"] = C.membership
    return G,C



In [None]:
def get_k_hop_neightbors(G,v,k):
    
    nbors=[]
    if k is 1:
        nbors=G.neighbors(v)
        #nbors.append(v)
    else:
        prev=G.neighborhood(v,k-1)
        cur=G.neighborhood(v,k)
        nbors=list(set(cur)-set(prev))
    return nbors

In [None]:
def community_signature(G,C):
    
    com_sig={}
    ncom=len(C)
    sig=[0.0,0.0]
    
    for u in G.vs:
        sig=[0.0,0.0]
        nbors=G.neighbors(u)
        deg=len(nbors)
        cidu=u["community"]
        for v in nbors:
            cidv=G.vs[v]["community"]
            if cidu == cidv:
                sig[0]=sig[0]+1.0
            else:
                sig[1]=sig[1]+1.0
        
        sig[0]=sig[0]/deg
        sig[1]=sig[1]/deg
        
        com_sig[u["id"]]=sig
        
    return com_sig

In [None]:
def create_overlaps_matrix(G1,G2,L,beta):
    
    ### G1: network 1
    ### G2: network 2
    ### L : Bipartite graph where 
    ### left side is G1 vertices and 
    ### right side is G2 vertices
    
    n1=G1.vcount()
    n2=G2.vcount()
    
    ### for iterating in L 
    G1_vid=list(range(n1))
    G2_vid=list(range(n1,n1+n2))
    
    ###
    nS=0
    if L is None:  
        ### L is not avaiable so assume complete bipartite
        ### and assume edges are indexed accordingly
        nS=n1*n2 
        
    else:
        nS=L.ecount()
    
    S=ig.Graph()  
    S.add_vertices(list(range(nS)))
    edgeS=[]
    
    for u in G1_vid:
        
        nbor1=G1.neighbors(u)
        
        for v in G2_vid:
            uv=-1
            if L is not None:
                uv=L.get_eid(u,v,directed=False,error=False)
                
                #if  uv == -1:
                    #print("Sparse L!!")
                    #continue
            else:
                uv= u*n2+(v-n1) #### Be careful
            
            if uv== -1:
                continue ## Just sanity check
            
            nbor2=G2.neighbors(v-n1)
            

            for i in nbor1:
                for jj in nbor2:
                    j=jj+n1 ### shifting the vertex id for bipartite graph

                    ij=-1
                    if L is not None:
                        ### Now check whether the neighbors has cross edge
                        ij=L.get_eid(i,j,directed=False,error=False)

                        #if ij == -1:
                            #print("L Sparse 2 !!")
                            #continue
                    else:
                        ij=(i*n2)+jj
                    
                    if ij==-1:
                        continue ### Just sanity check

                    
                    edgeS.append((uv,ij))

    S.add_edges(edgeS)
    print(edgeS)

    return S


In [None]:
def create_overlaps_igraph(G1,C1,G2,C2,com_sig1,com_sig2,L,hop_coeff,alpha):
    
    print("hop: ",hop_coeff)
    print("alpha: ",alpha)
    n1=G1.vcount()
    n2=G2.vcount()
    hops=len(hop_coeff)
    
    maxdeg_factor=G1.maxdegree()+G2.maxdegree()
    com_factor=1
    
    
    ##### Bipartite graph needs unique vertex names
    
    B=ig.Graph()    
    
    G1_vid=list(range(n1))
    G2_vid=list(range(n1,n1+n2))
    
    B.add_vertices(G1_vid)
    B.add_vertices(G2_vid)
    
    vtype=[]
    for i in range(B.vcount()):
        if i < G1.vcount():
            vtype.append(0)
        else:
            vtype.append(1)
    
    B.vs['type']=vtype  ### type denotes sides of bipartition
    
    if L is None:
        edgeL=[]
        for i in G1_vid:
            for j in G2_vid:
                edgeL.append((i,j))
        B.add_edges(edgeL)
        B.es["weight"]=0
    
    edgeL=[]
    
    for u in range(n1):
        cid1=G1.vs[u]["community"]
        for v in range(n2):
            cid2=G2.vs[v]["community"]
            
            #### check whether u,v exist in B
            
            uv=B.get_eid(u,v+n1,directed=False,error=False)
            
            if  uv == -1:
                print("Sparse L!!")
                continue
            
            ### Now calculate the topological score
            
            val=0
            com_factor=1
            
            tval=[0]*hops
            
            for k in range(hops):
                
                nbor1=get_k_hop_neightbors(G1,u,k+1)
                nbor2=get_k_hop_neightbors(G2,v,k+1)
                
                com_nbor1=list((set(nbor1).intersection(set(C1[cid1]))))
                com_nbor2=list((set(nbor2).intersection(set(C2[cid2]))))
                
                if k > 0:
                    nbor1=com_nbor1
                    nbor2=com_nbor2
                else:
                    t1=com_sig1[G1.vs[u]["id"]]
                    t2=com_sig2[G2.vs[v]["id"]]
                    com_factor=float(cosine_similarity([t1],[t2])[0])
                
                #print("nbors: ",nbor1,nbor2)
                
                for i in nbor1:
                    for jj in nbor2:
                        j=jj+n1 ### shifting the vertex id for bipartite graph
                        
                        ### Now check whether the neighbors has cross edge
                        ij=B.get_eid(i,j,directed=False,error=False)
                        
                        if ij == -1:
                            print("L Sparse 2 !!")
                            continue
                                                
                        com_i=G1.vs[i]['community']
                        com_j=G2.vs[jj]['community']
                        
                        if cid1 == com_i:
                            member1=True
                        else:
                            member1=False
                    
                        if cid2 == com_j:
                            member2=True
                        else:
                            member2=False
                        
                        numer=1.0
                        
                        
                        ###### Right now numer is always 1
                        ###### That means, community information is irrelavant
                        ###### We will assign appropriate value later
                        if member1 and member2:
                            numer=1.0
                        else:
                            if member1 is False and member2 is False:
                                numer=1.0
                            else:
                                numer=1.0
                        
                                
                        
                        
                        tval[k]=tval[k]+numer
                        #print("before: ",u,v+n1,val,numer,denom)
                        #val=val+((numer/denom)*hop_coeff[k])
                        #print("after: ", u,v+n1,val,hop_coeff[k])
                        
                denom=len(nbor1)*len(nbor2)*1.0
                tval[k]=tval[k]/denom*hop_coeff[k]
                val=val+tval[k]
            
        
            deg_diff=abs(G1.degree(u)-G2.degree(v))+1
            deg_factor=((G1.degree(u)+G2.degree(v))/deg_diff)/maxdeg_factor
            
            if val> 1.01 or deg_factor > 1.01 or com_factor > 1.01:
                print("Factoring problem: ",val,", ",deg_factor,", ",com_factor)
            
            node_similarity=.5*deg_factor+.5*com_factor
            
            B.es[uv]['weight']=alpha*val+(1.0-alpha)*node_similarity
            
    #for e in B.es:
        #print(e.tuple, e['weight'])
    return B


In [None]:
def create_overlaps(G1,C1,G2,C2,com_sig1,com_sig2,L,hop_coeff,alpha):
    
    print("hop: ",hop_coeff)
    print("alpha: ",alpha)
    n1=G1.vcount()
    n2=G2.vcount()
    hops=len(hop_coeff)
    
    maxdeg_factor=G1.maxdegree()+G2.maxdegree()
    com_factor=1
    
    row=[]
    col=[]
    data=[]
    
    for u in range(n1):
        cid1=G1.vs[u]["community"]
        for v in range(n2):
            cid2=G2.vs[v]["community"]
            
            ##### TODO: NEED TO CHECK WHETHER (u,v) has edge in L
            
            ### Now calculate the topological score
            
            val=0
            com_factor=1
            
            tval=[0]*hops
            
            for k in range(hops):
                
                nbor1=get_k_hop_neightbors(G1,u,k+1)
                nbor2=get_k_hop_neightbors(G2,v,k+1)
                
                com_nbor1=list((set(nbor1).intersection(set(C1[cid1]))))
                com_nbor2=list((set(nbor2).intersection(set(C2[cid2]))))
                
                if k > 0:
                    nbor1=com_nbor1
                    nbor2=com_nbor2
                else:
                    t1=com_sig1[G1.vs[u]["id"]]
                    t2=com_sig2[G2.vs[v]["id"]]
                    com_factor=float(cosine_similarity([t1],[t2])[0])
                
                #print("nbors: ",nbor1,nbor2)
                
                for i in nbor1:
                    for jj in nbor2:
                        j=jj+n1 ### shifting the vertex id for bipartite graph
                        
                        ### TODO: Now check whether the neighbors has cross edge
                                                
                        com_i=G1.vs[i]['community']
                        com_j=G2.vs[jj]['community']
                        
                        if cid1 == com_i:
                            member1=True
                        else:
                            member1=False
                    
                        if cid2 == com_j:
                            member2=True
                        else:
                            member2=False
                        
                        numer=1.0
                        
                        if member1 and member2:
                            numer=1.0
                        else:
                            if member1 is False and member2 is False:
                                numer=1.0
                            else:
                                numer=1.0
                        
                                                      
                        tval[k]=tval[k]+numer
                        #print("before: ",u,v+n1,val,numer,denom)
                        #val=val+((numer/denom)*hop_coeff[k])
                        #print("after: ", u,v+n1,val,hop_coeff[k])
                        
                denom=len(nbor1)*len(nbor2)*1.0
                if denom >0:
                    tval[k]=tval[k]/denom*hop_coeff[k]
                    val=val+tval[k]
                  
            deg_diff=abs(G1.degree(u)-G2.degree(v))+1
            deg_factor=((G1.degree(u)+G2.degree(v))/deg_diff)/maxdeg_factor
            
            #if val> 1.01 or deg_factor > 1.01 or com_factor > 1.01:
                #print("Factoring problem: ",val,", ",deg_factor,", ",com_factor)
            
            node_similarity=0.0*deg_factor+1*com_factor
            
            val=alpha*val+(1.0-alpha)*node_similarity
            row.append(u)
            col.append(v+n1)
            data.append(val)
            
            ### Symmetrize
            row.append(v+n1)
            col.append(u)
            data.append(val)
            
            
    ### Code for scipy coo_matrix
    
    row  = np.array(row)
    col  = np.array(col)
    data = np.array(data)
    B = sp.sparse.coo_matrix((data, (row, col)), shape=(n1+n2, n1+n2))
    return B


In [None]:
def get_alignment(G1,G2,M):
    
    n1=G1.vcount()
    n2=G2.vcount()
    Align=[]
    for i in range(len(M)):
        if i < n1:
            j=M[i][0][1]-n1
            
            u=G1.vs[i]['id']
            v=G2.vs[j]['id']
            
            Align.append((u,v))
        else:
            break
    
    return Align 
            

In [None]:
def save_alignment(L,filename):
    
    fl=open(filename,"w")
    for (u,v) in L:
        t=u+" "+v+"\n"
        fl.write(t)
    
    fl.close()

In [None]:
def write_mtx(G,u,v,m,filename):
    
    
    G.write("temp.txt",format="edgelist")
    
    fl=open(filename,"w")
    ft=open("temp.txt","r")
    
    header="%%mtx file header"
    fl.write(header)
    fl.write('\n')
    header=str(u)+" "+str(v)+" "+str(m)
    fl.write(header)
    fl.write('\n')
    
    for line in ft:
        fl.write(line)
    fl.close()
    ft.close()

In [None]:
def netAlign(f1,f2,resf=None,L=None,hop_coeff=None,alpha=0.5,input_dir=None,res_dir=None):
    
    if input_dir is None:
        input_dir="/Users/khan242/PNNL/netAlign/data/synthetic networks/"
    if res_dir is None:
        res_dir="/Users/khan242/PNNL/netAlign/results/"  
    
    if resf is None:
        resf="align.aln"
    
    resp=resf.split(".")
    
        
    #### Reading networks
    G1,G2=read_networks(input_dir+f1,input_dir+f2)
    
    #### Cluster networks
    G1,C1 =cluster_networks(G1)
    G2,C2 =cluster_networks(G2)
    
    com_sig1=community_signature(G1,C1)
    com_sig2=community_signature(G2,C2)
    
    if L is None or hop_coeff is "ALL":
        L=create_overlaps(G1,C1,G2,C2,com_sig1,com_sig2,None,[1],alpha)
        M=L.maximum_bipartite_matching(weights='weight')
        AL=get_alignment(G1,G2,M)
        save_alignment(AL,res_dir+resf)
        
        if hop_coeff is "ALL":
            L=create_overlaps(G1,C1,G2,C2,com_sig1,com_sig2,None,[1,0.5],alpha)
            M=L.maximum_bipartite_matching(weights='weight')
            AL=get_alignment(G1,G2,M)
            save_alignment(AL,res_dir+resp[0]+"_2."+resp[1])
    
            L=create_overlaps(G1,C1,G2,C2,com_sig1,com_sig2,None,[1,0.5,0.25],alpha)
            M=L.maximum_bipartite_matching(weights='weight')
            AL=get_alignment(G1,G2,M)
            save_alignment(AL,res_dir+resp[0]+"_3."+resp[1])
    else:
        L=create_overlaps(G1,C1,G2,C2,com_sig1,com_sig2,None,hop_coeff,alpha)
        M=bm.bSuitor(L,1)
        AL=get_alignment(G1,G2,M)
        save_alignment(AL,res_dir+resf)

In [None]:
if __name__ == '__main__':
    
    warnings.filterwarnings("ignore")
    
    
    input_dir="/Users/khan242/PNNL/netAlign/data/synthetic networks/"
    res_dir="/Users/khan242/PNNL/netAlign/results/"
    
    f1="yeast0_Y2H1.gw"
    f2="yeast10_Y2H1.gw"
    resf="yeast0_yeast10_Y2H1.aln"
    
    netAlign(f1,f2,resf,None,"ALL",input_dir,res_dir)
    
    f1="yeast0_Y2H1.gw"
    f2="yeast15_Y2H1.gw"
    resf="yeast0_yeast15_Y2H1.aln"
    
    netAlign(f1,f2,resf,None,"ALL",input_dir,res_dir)
    
    f1="yeast0_Y2H1.gw"
    f2="yeast20_Y2H1.gw"
    resf="yeast0_yeast20_Y2H1.aln"
    
    netAlign(f1,f2,resf,None,"ALL",input_dir,res_dir)
    
    f1="yeast0_Y2H1.gw"
    f2="yeast25_Y2H1.gw"
    resf="yeast0_yeast25_Y2H1.aln"
    
    netAlign(f1,f2,resf,None,"ALL",input_dir,res_dir)
    

In [None]:
input_dir="/Users/khan242/PNNL/netAlign/data/synthetic networks/"
#input_dir="/Users/khan242/PNNL/netAlign/data/real world networks/"
res_dir="/Users/khan242/PNNL/netAlign/results/"
importlib.reload(bm)


#f1="test1.gw"
#f2="test2.gw"

f1="yeast0_Y2H1.gw"
f2="yeast5_Y2H1.gw"

   

#G1,G2=read_networks(input_dir+f1,input_dir+f2)

In [None]:

resf="test.aln"
netAlign(f1,f2,resf,[],[1],.75,input_dir,res_dir)
!cat ../results/test.aln

In [None]:
resf="yeast0_yeast5_Y2H1.aln"    
netAlign(f1,f2,resf,[],[1],0.95,input_dir,res_dir)

In [None]:
resf="yeast0_yeast5_Y2H1.aln"    
netAlign(f1,f2,resf,[],[1],1.0,input_dir,res_dir)

resf="yeast0_yeast5_Y2H1_2.aln"
netAlign(f1,f2,resf,[],[1],0.9,input_dir,res_dir)


resf="yeast0_yeast5_Y2H1_3.aln"
netAlign(f1,f2,resf,[],[0.8,0.2],1.0,input_dir,res_dir)

resf="yeast0_yeast5_Y2H1_4.aln"
netAlign(f1,f2,resf,[],[0.8,0.2],0.9,input_dir,res_dir)


In [None]:
  

#### Reading networks
G1,G2=read_networks(input_dir+f1,input_dir+f2)
    
#### Cluster networks
G1,C1 =cluster_networks(G1)
G2,C2 =cluster_networks(G2)
L=create_overlaps(G1,C1,G2,C2,None,[1])
M=L.maximum_bipartite_matching(weights='weight')
print(M.matching)
AL=get_alignment(G1,G2,M)

L1=create_overlaps(G1,C1,G2,C2,None,[1])
print(L1.vcount())
print(L1.ecount())
print(L1.vs[0],L.vs[1])
print(L1.es[0],L.es[1])
M1=L1.maximum_bipartite_matching(weights='weight')
AL1=get_alignment(G1,G2,M1)

In [None]:
#### Reading networks

cinfo1=community_info(G1)
cinfo2=community_info(G1)
print(cinfo1)

In [None]:
count=[]
dominant=0
for (key,val) in cinfo1.items():
    
    count.append(val[0])
    #print(val[1],val[2])
    if val[1] == val[2][0][0]:
        dominant=dominant+1
    else:
        print(key,val)
countS=collections.Counter(count)
print(countS,dominant/len(cinfo))

In [None]:
count=[]
dominant=0
for (key,val) in cinfo2.items():
    
    count.append(val[0])
    #print(val[1],val[2])
    if val[1] == val[2][0][0]:
        dominant=dominant+1
    else:
        print(key,val)
countS=collections.Counter(count)
print(countS,dominant/len(cinfo))

In [None]:
print(L.vs.attribute_names(),L.ecount(),L.vcount(),L.is_bipartite(),L.is_directed())
save_alignment(AL,folder+"yeast_human_Y2H2_1.aln")

In [None]:
A=set(G1.vs['id'])
B=set(G2.vs['id'])
print(A.intersection(B))

In [None]:
count=0
for i in L.es:
    print(i.target)
    count=count+1
    if count == 20:
        break

In [None]:

print(G1.vcount(),G2.vcount(),L.vcount(),AL)

for (u,v) in AL:
    if u is 'MCM10' or v is 'MCM10':
        print(u,v)

In [None]:
t="align.aln"
s=t.split(".")
print(s)
print(t)

In [None]:
G2,C2 =cluster_networks(G2)
com_sig=community_signature(G2,C2)
print(C2)
print(com_sig)

In [None]:
print(C2[0],len(C2))

In [None]:
print(community_info(G1))

In [None]:
t1=[1,0]
t2=[.67,.33]
print(cosine_similarity([t1],[t2]))

In [None]:
G1=ig.Graph()
G2=ig.Graph()
L=ig.Graph()

In [None]:
G1.add_vertices([0,1,2])
G2.add_vertices([0,1,2,3,4])
G1.add_edges([(0,1),(0,2),(1,2)])
G2.add_edges([(0,2),(1,2),(2,3),(2,4),(3,4)])


L.add_vertices([0,1,2,3,4,5,6,7])
L.add_edges([(0,5),(1,3),(1,6),(2,4),(2,7)])

In [None]:
S=create_overlaps_matrix(G1,G2,None,0)

In [None]:
SL=create_overlaps_matrix(G1,G2,L,0)

In [None]:
S=SL.get_adjacency_sparse()
print(type(S))
#L=sp.sparse(S)
ig.get_adjacency_sparse(SL)